]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'c23ce454b3e33634a188d6facfd2b7182af5af93'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60 #include "sp5x.h"
61
62 static int encode_picture(MpegEncContext *s, int picture_number);
63 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
64 static int sse_mb(MpegEncContext *s);
65 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
66 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
67
68 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
69 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
70
71 const AVOption ff_mpv_generic_options[] = {
72     FF_MPV_COMMON_OPTS
73     { NULL },
74 };
75
76 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
77                        uint16_t (*qmat16)[2][64],
78                        const uint16_t *quant_matrix,
79                        int bias, int qmin, int qmax, int intra)
80 {
81     FDCTDSPContext *fdsp = &s->fdsp;
82     int qscale;
83     int shift = 0;
84
85     for (qscale = qmin; qscale <= qmax; qscale++) {
86         int i;
87         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
88             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
89             fdsp->fdct == ff_faandct) {
90             for (i = 0; i < 64; i++) {
91                 const int j = s->idsp.idct_permutation[i];
92                 /* 16 <= qscale * quant_matrix[i] <= 7905
93                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
94                  *             19952 <=              x  <= 249205026
95                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
96                  *           3444240 >= (1 << 36) / (x) >= 275 */
97
98                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
99                                         (qscale * quant_matrix[j]));
100             }
101         } else if (fdsp->fdct == ff_fdct_ifast) {
102             for (i = 0; i < 64; i++) {
103                 const int j = s->idsp.idct_permutation[i];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
111                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
112             }
113         } else {
114             for (i = 0; i < 64; i++) {
115                 const int j = s->idsp.idct_permutation[i];
116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
117                  * Assume x = qscale * quant_matrix[i]
118                  * So             16 <=              x  <= 7905
119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
120                  * so          32768 >= (1 << 19) / (x) >= 67 */
121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
122                                         (qscale * quant_matrix[j]));
123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
124                 //                    (qscale * quant_matrix[i]);
125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
126                                        (qscale * quant_matrix[j]);
127
128                 if (qmat16[qscale][0][i] == 0 ||
129                     qmat16[qscale][0][i] == 128 * 256)
130                     qmat16[qscale][0][i] = 128 * 256 - 1;
131                 qmat16[qscale][1][i] =
132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
133                                 qmat16[qscale][0][i]);
134             }
135         }
136
137         for (i = intra; i < 64; i++) {
138             int64_t max = 8191;
139             if (fdsp->fdct == ff_fdct_ifast) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void update_duplicate_context_after_me(MpegEncContext *dst,
194                                               MpegEncContext *src)
195 {
196 #define COPY(a) dst->a= src->a
197     COPY(pict_type);
198     COPY(current_picture);
199     COPY(f_code);
200     COPY(b_code);
201     COPY(qscale);
202     COPY(lambda);
203     COPY(lambda2);
204     COPY(picture_in_gop_number);
205     COPY(gop_picture_number);
206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
207     COPY(progressive_frame);    // FIXME don't set in encode_header
208     COPY(partitioned_frame);    // FIXME don't set in encode_header
209 #undef COPY
210 }
211
212 /**
213  * Set the given MpegEncContext to defaults for encoding.
214  * the changed fields will not depend upon the prior state of the MpegEncContext.
215  */
216 static void MPV_encode_defaults(MpegEncContext *s)
217 {
218     int i;
219     ff_MPV_common_defaults(s);
220
221     for (i = -16; i < 16; i++) {
222         default_fcode_tab[i + MAX_MV] = 1;
223     }
224     s->me.mv_penalty = default_mv_penalty;
225     s->fcode_tab     = default_fcode_tab;
226
227     s->input_picture_number  = 0;
228     s->picture_in_gop_number = 0;
229 }
230
231 av_cold int ff_dct_encode_init(MpegEncContext *s) {
232     if (ARCH_X86)
233         ff_dct_encode_init_x86(s);
234
235     if (CONFIG_H263_ENCODER)
236         ff_h263dsp_init(&s->h263dsp);
237     if (!s->dct_quantize)
238         s->dct_quantize = ff_dct_quantize_c;
239     if (!s->denoise_dct)
240         s->denoise_dct  = denoise_dct_c;
241     s->fast_dct_quantize = s->dct_quantize;
242     if (s->avctx->trellis)
243         s->dct_quantize  = dct_quantize_trellis_c;
244
245     return 0;
246 }
247
248 /* init video encoder */
249 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
250 {
251     MpegEncContext *s = avctx->priv_data;
252     int i, ret, format_supported;
253
254     MPV_encode_defaults(s);
255
256     switch (avctx->codec_id) {
257     case AV_CODEC_ID_MPEG2VIDEO:
258         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
259             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
260             av_log(avctx, AV_LOG_ERROR,
261                    "only YUV420 and YUV422 are supported\n");
262             return -1;
263         }
264         break;
265     case AV_CODEC_ID_MJPEG:
266     case AV_CODEC_ID_AMV:
267         format_supported = 0;
268         /* JPEG color space */
269         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
270             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
271             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
272             (avctx->color_range == AVCOL_RANGE_JPEG &&
273              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
274               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
275               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
276             format_supported = 1;
277         /* MPEG color space */
278         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
279                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
280                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
281                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
282             format_supported = 1;
283
284         if (!format_supported) {
285             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
286             return -1;
287         }
288         break;
289     default:
290         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
291             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
292             return -1;
293         }
294     }
295
296     switch (avctx->pix_fmt) {
297     case AV_PIX_FMT_YUVJ444P:
298     case AV_PIX_FMT_YUV444P:
299         s->chroma_format = CHROMA_444;
300         break;
301     case AV_PIX_FMT_YUVJ422P:
302     case AV_PIX_FMT_YUV422P:
303         s->chroma_format = CHROMA_422;
304         break;
305     case AV_PIX_FMT_YUVJ420P:
306     case AV_PIX_FMT_YUV420P:
307     default:
308         s->chroma_format = CHROMA_420;
309         break;
310     }
311
312     s->bit_rate = avctx->bit_rate;
313     s->width    = avctx->width;
314     s->height   = avctx->height;
315     if (avctx->gop_size > 600 &&
316         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
317         av_log(avctx, AV_LOG_WARNING,
318                "keyframe interval too large!, reducing it from %d to %d\n",
319                avctx->gop_size, 600);
320         avctx->gop_size = 600;
321     }
322     s->gop_size     = avctx->gop_size;
323     s->avctx        = avctx;
324     s->flags        = avctx->flags;
325     s->flags2       = avctx->flags2;
326     if (avctx->max_b_frames > MAX_B_FRAMES) {
327         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
328                "is %d.\n", MAX_B_FRAMES);
329         avctx->max_b_frames = MAX_B_FRAMES;
330     }
331     s->max_b_frames = avctx->max_b_frames;
332     s->codec_id     = avctx->codec->id;
333     s->strict_std_compliance = avctx->strict_std_compliance;
334     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
335     s->mpeg_quant         = avctx->mpeg_quant;
336     s->rtp_mode           = !!avctx->rtp_payload_size;
337     s->intra_dc_precision = avctx->intra_dc_precision;
338
339     // workaround some differences between how applications specify dc precission
340     if (s->intra_dc_precision < 0) {
341         s->intra_dc_precision += 8;
342     } else if (s->intra_dc_precision >= 8)
343         s->intra_dc_precision -= 8;
344
345     if (s->intra_dc_precision < 0) {
346         av_log(avctx, AV_LOG_ERROR,
347                 "intra dc precision must be positive, note some applications use"
348                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
349         return AVERROR(EINVAL);
350     }
351
352     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
353         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
354         return AVERROR(EINVAL);
355     }
356     s->user_specified_pts = AV_NOPTS_VALUE;
357
358     if (s->gop_size <= 1) {
359         s->intra_only = 1;
360         s->gop_size   = 12;
361     } else {
362         s->intra_only = 0;
363     }
364
365     s->me_method = avctx->me_method;
366
367     /* Fixed QSCALE */
368     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
369
370     s->adaptive_quant = (s->avctx->lumi_masking ||
371                          s->avctx->dark_masking ||
372                          s->avctx->temporal_cplx_masking ||
373                          s->avctx->spatial_cplx_masking  ||
374                          s->avctx->p_masking      ||
375                          s->avctx->border_masking ||
376                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
377                         !s->fixed_qscale;
378
379     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
380
381     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
382         switch(avctx->codec_id) {
383         case AV_CODEC_ID_MPEG1VIDEO:
384         case AV_CODEC_ID_MPEG2VIDEO:
385             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
386             break;
387         case AV_CODEC_ID_MPEG4:
388         case AV_CODEC_ID_MSMPEG4V1:
389         case AV_CODEC_ID_MSMPEG4V2:
390         case AV_CODEC_ID_MSMPEG4V3:
391             if       (avctx->rc_max_rate >= 15000000) {
392                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
393             } else if(avctx->rc_max_rate >=  2000000) {
394                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
395             } else if(avctx->rc_max_rate >=   384000) {
396                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
397             } else
398                 avctx->rc_buffer_size = 40;
399             avctx->rc_buffer_size *= 16384;
400             break;
401         }
402         if (avctx->rc_buffer_size) {
403             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
404         }
405     }
406
407     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
408         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
409         return -1;
410     }
411
412     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
415     }
416
417     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
418         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
419         return -1;
420     }
421
422     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
423         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
424         return -1;
425     }
426
427     if (avctx->rc_max_rate &&
428         avctx->rc_max_rate == avctx->bit_rate &&
429         avctx->rc_max_rate != avctx->rc_min_rate) {
430         av_log(avctx, AV_LOG_INFO,
431                "impossible bitrate constraints, this will fail\n");
432     }
433
434     if (avctx->rc_buffer_size &&
435         avctx->bit_rate * (int64_t)avctx->time_base.num >
436             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
437         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
438         return -1;
439     }
440
441     if (!s->fixed_qscale &&
442         avctx->bit_rate * av_q2d(avctx->time_base) >
443             avctx->bit_rate_tolerance) {
444         av_log(avctx, AV_LOG_WARNING,
445                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
446         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
447     }
448
449     if (s->avctx->rc_max_rate &&
450         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
451         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
452          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
453         90000LL * (avctx->rc_buffer_size - 1) >
454             s->avctx->rc_max_rate * 0xFFFFLL) {
455         av_log(avctx, AV_LOG_INFO,
456                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
457                "specified vbv buffer is too large for the given bitrate!\n");
458     }
459
460     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
461         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
462         s->codec_id != AV_CODEC_ID_FLV1) {
463         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
464         return -1;
465     }
466
467     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
468         av_log(avctx, AV_LOG_ERROR,
469                "OBMC is only supported with simple mb decision\n");
470         return -1;
471     }
472
473     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
474         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
475         return -1;
476     }
477
478     if (s->max_b_frames                    &&
479         s->codec_id != AV_CODEC_ID_MPEG4      &&
480         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
481         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
482         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
483         return -1;
484     }
485     if (s->max_b_frames < 0) {
486         av_log(avctx, AV_LOG_ERROR,
487                "max b frames must be 0 or positive for mpegvideo based encoders\n");
488         return -1;
489     }
490
491     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
492          s->codec_id == AV_CODEC_ID_H263  ||
493          s->codec_id == AV_CODEC_ID_H263P) &&
494         (avctx->sample_aspect_ratio.num > 255 ||
495          avctx->sample_aspect_ratio.den > 255)) {
496         av_log(avctx, AV_LOG_WARNING,
497                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
498                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
499         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
500                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
501     }
502
503     if ((s->codec_id == AV_CODEC_ID_H263  ||
504          s->codec_id == AV_CODEC_ID_H263P) &&
505         (avctx->width  > 2048 ||
506          avctx->height > 1152 )) {
507         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
508         return -1;
509     }
510     if ((s->codec_id == AV_CODEC_ID_H263  ||
511          s->codec_id == AV_CODEC_ID_H263P) &&
512         ((avctx->width &3) ||
513          (avctx->height&3) )) {
514         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
515         return -1;
516     }
517
518     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
519         (avctx->width  > 4095 ||
520          avctx->height > 4095 )) {
521         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
522         return -1;
523     }
524
525     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
526         (avctx->width  > 16383 ||
527          avctx->height > 16383 )) {
528         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
529         return -1;
530     }
531
532     if (s->codec_id == AV_CODEC_ID_RV10 &&
533         (avctx->width &15 ||
534          avctx->height&15 )) {
535         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
536         return AVERROR(EINVAL);
537     }
538
539     if (s->codec_id == AV_CODEC_ID_RV20 &&
540         (avctx->width &3 ||
541          avctx->height&3 )) {
542         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
543         return AVERROR(EINVAL);
544     }
545
546     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
547          s->codec_id == AV_CODEC_ID_WMV2) &&
548          avctx->width & 1) {
549          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
550          return -1;
551     }
552
553     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
554         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
555         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
556         return -1;
557     }
558
559     // FIXME mpeg2 uses that too
560     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
561                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
562         av_log(avctx, AV_LOG_ERROR,
563                "mpeg2 style quantization not supported by codec\n");
564         return -1;
565     }
566
567     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
568         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
569         return -1;
570     }
571
572     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
573         s->avctx->mb_decision != FF_MB_DECISION_RD) {
574         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
575         return -1;
576     }
577
578     if (s->avctx->scenechange_threshold < 1000000000 &&
579         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
580         av_log(avctx, AV_LOG_ERROR,
581                "closed gop with scene change detection are not supported yet, "
582                "set threshold to 1000000000\n");
583         return -1;
584     }
585
586     if (s->flags & CODEC_FLAG_LOW_DELAY) {
587         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
588             av_log(avctx, AV_LOG_ERROR,
589                   "low delay forcing is only available for mpeg2\n");
590             return -1;
591         }
592         if (s->max_b_frames != 0) {
593             av_log(avctx, AV_LOG_ERROR,
594                    "b frames cannot be used with low delay\n");
595             return -1;
596         }
597     }
598
599     if (s->q_scale_type == 1) {
600         if (avctx->qmax > 12) {
601             av_log(avctx, AV_LOG_ERROR,
602                    "non linear quant only supports qmax <= 12 currently\n");
603             return -1;
604         }
605     }
606
607     if (s->avctx->thread_count > 1         &&
608         s->codec_id != AV_CODEC_ID_MPEG4      &&
609         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
610         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
611         s->codec_id != AV_CODEC_ID_MJPEG      &&
612         (s->codec_id != AV_CODEC_ID_H263P)) {
613         av_log(avctx, AV_LOG_ERROR,
614                "multi threaded encoding not supported by codec\n");
615         return -1;
616     }
617
618     if (s->avctx->thread_count < 1) {
619         av_log(avctx, AV_LOG_ERROR,
620                "automatic thread number detection not supported by codec, "
621                "patch welcome\n");
622         return -1;
623     }
624
625     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
626         s->rtp_mode = 1;
627
628     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
629         s->h263_slice_structured = 1;
630
631     if (!avctx->time_base.den || !avctx->time_base.num) {
632         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
633         return -1;
634     }
635
636     i = (INT_MAX / 2 + 128) >> 8;
637     if (avctx->mb_threshold >= i) {
638         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
639                i - 1);
640         return -1;
641     }
642
643     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
644         av_log(avctx, AV_LOG_INFO,
645                "notice: b_frame_strategy only affects the first pass\n");
646         avctx->b_frame_strategy = 0;
647     }
648
649     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
650     if (i > 1) {
651         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
652         avctx->time_base.den /= i;
653         avctx->time_base.num /= i;
654         //return -1;
655     }
656
657     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
658         // (a + x * 3 / 8) / x
659         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
660         s->inter_quant_bias = 0;
661     } else {
662         s->intra_quant_bias = 0;
663         // (a - x / 4) / x
664         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
665     }
666
667     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
668         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
669         return AVERROR(EINVAL);
670     }
671
672     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
673         s->intra_quant_bias = avctx->intra_quant_bias;
674     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
675         s->inter_quant_bias = avctx->inter_quant_bias;
676
677     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
678
679     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
680         s->avctx->time_base.den > (1 << 16) - 1) {
681         av_log(avctx, AV_LOG_ERROR,
682                "timebase %d/%d not supported by MPEG 4 standard, "
683                "the maximum admitted value for the timebase denominator "
684                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
685                (1 << 16) - 1);
686         return -1;
687     }
688     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
689
690     switch (avctx->codec->id) {
691     case AV_CODEC_ID_MPEG1VIDEO:
692         s->out_format = FMT_MPEG1;
693         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
694         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
695         break;
696     case AV_CODEC_ID_MPEG2VIDEO:
697         s->out_format = FMT_MPEG1;
698         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
699         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
700         s->rtp_mode   = 1;
701         break;
702     case AV_CODEC_ID_MJPEG:
703     case AV_CODEC_ID_AMV:
704         s->out_format = FMT_MJPEG;
705         s->intra_only = 1; /* force intra only for jpeg */
706         if (!CONFIG_MJPEG_ENCODER ||
707             ff_mjpeg_encode_init(s) < 0)
708             return -1;
709         avctx->delay = 0;
710         s->low_delay = 1;
711         break;
712     case AV_CODEC_ID_H261:
713         if (!CONFIG_H261_ENCODER)
714             return -1;
715         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
716             av_log(avctx, AV_LOG_ERROR,
717                    "The specified picture size of %dx%d is not valid for the "
718                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
719                     s->width, s->height);
720             return -1;
721         }
722         s->out_format = FMT_H261;
723         avctx->delay  = 0;
724         s->low_delay  = 1;
725         break;
726     case AV_CODEC_ID_H263:
727         if (!CONFIG_H263_ENCODER)
728             return -1;
729         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
730                              s->width, s->height) == 8) {
731             av_log(avctx, AV_LOG_ERROR,
732                    "The specified picture size of %dx%d is not valid for "
733                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
734                    "352x288, 704x576, and 1408x1152. "
735                    "Try H.263+.\n", s->width, s->height);
736             return -1;
737         }
738         s->out_format = FMT_H263;
739         avctx->delay  = 0;
740         s->low_delay  = 1;
741         break;
742     case AV_CODEC_ID_H263P:
743         s->out_format = FMT_H263;
744         s->h263_plus  = 1;
745         /* Fx */
746         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
747         s->modified_quant  = s->h263_aic;
748         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
749         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
750
751         /* /Fx */
752         /* These are just to be sure */
753         avctx->delay = 0;
754         s->low_delay = 1;
755         break;
756     case AV_CODEC_ID_FLV1:
757         s->out_format      = FMT_H263;
758         s->h263_flv        = 2; /* format = 1; 11-bit codes */
759         s->unrestricted_mv = 1;
760         s->rtp_mode  = 0; /* don't allow GOB */
761         avctx->delay = 0;
762         s->low_delay = 1;
763         break;
764     case AV_CODEC_ID_RV10:
765         s->out_format = FMT_H263;
766         avctx->delay  = 0;
767         s->low_delay  = 1;
768         break;
769     case AV_CODEC_ID_RV20:
770         s->out_format      = FMT_H263;
771         avctx->delay       = 0;
772         s->low_delay       = 1;
773         s->modified_quant  = 1;
774         s->h263_aic        = 1;
775         s->h263_plus       = 1;
776         s->loop_filter     = 1;
777         s->unrestricted_mv = 0;
778         break;
779     case AV_CODEC_ID_MPEG4:
780         s->out_format      = FMT_H263;
781         s->h263_pred       = 1;
782         s->unrestricted_mv = 1;
783         s->low_delay       = s->max_b_frames ? 0 : 1;
784         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
785         break;
786     case AV_CODEC_ID_MSMPEG4V2:
787         s->out_format      = FMT_H263;
788         s->h263_pred       = 1;
789         s->unrestricted_mv = 1;
790         s->msmpeg4_version = 2;
791         avctx->delay       = 0;
792         s->low_delay       = 1;
793         break;
794     case AV_CODEC_ID_MSMPEG4V3:
795         s->out_format        = FMT_H263;
796         s->h263_pred         = 1;
797         s->unrestricted_mv   = 1;
798         s->msmpeg4_version   = 3;
799         s->flipflop_rounding = 1;
800         avctx->delay         = 0;
801         s->low_delay         = 1;
802         break;
803     case AV_CODEC_ID_WMV1:
804         s->out_format        = FMT_H263;
805         s->h263_pred         = 1;
806         s->unrestricted_mv   = 1;
807         s->msmpeg4_version   = 4;
808         s->flipflop_rounding = 1;
809         avctx->delay         = 0;
810         s->low_delay         = 1;
811         break;
812     case AV_CODEC_ID_WMV2:
813         s->out_format        = FMT_H263;
814         s->h263_pred         = 1;
815         s->unrestricted_mv   = 1;
816         s->msmpeg4_version   = 5;
817         s->flipflop_rounding = 1;
818         avctx->delay         = 0;
819         s->low_delay         = 1;
820         break;
821     default:
822         return -1;
823     }
824
825     avctx->has_b_frames = !s->low_delay;
826
827     s->encoding = 1;
828
829     s->progressive_frame    =
830     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
831                                                 CODEC_FLAG_INTERLACED_ME) ||
832                                 s->alternate_scan);
833
834     /* init */
835     if (ff_MPV_common_init(s) < 0)
836         return -1;
837
838     ff_fdctdsp_init(&s->fdsp, avctx);
839     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
840     ff_pixblockdsp_init(&s->pdsp, avctx);
841     ff_qpeldsp_init(&s->qdsp);
842
843     s->avctx->coded_frame = s->current_picture.f;
844
845     if (s->msmpeg4_version) {
846         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
847                           2 * 2 * (MAX_LEVEL + 1) *
848                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
849     }
850     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
851
852     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
853     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
854     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
855     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
856     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
857     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
858     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
859                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
860     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
861                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
862
863     if (s->avctx->noise_reduction) {
864         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
865                           2 * 64 * sizeof(uint16_t), fail);
866     }
867
868     ff_dct_encode_init(s);
869
870     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
871         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
872
873     s->quant_precision = 5;
874
875     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
876     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
877
878     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
879         ff_h261_encode_init(s);
880     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
881         ff_h263_encode_init(s);
882     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
883         ff_msmpeg4_encode_init(s);
884     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
885         && s->out_format == FMT_MPEG1)
886         ff_mpeg1_encode_init(s);
887
888     /* init q matrix */
889     for (i = 0; i < 64; i++) {
890         int j = s->idsp.idct_permutation[i];
891         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
892             s->mpeg_quant) {
893             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
894             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
895         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
896             s->intra_matrix[j] =
897             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
898         } else {
899             /* mpeg1/2 */
900             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
901             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
902         }
903         if (s->avctx->intra_matrix)
904             s->intra_matrix[j] = s->avctx->intra_matrix[i];
905         if (s->avctx->inter_matrix)
906             s->inter_matrix[j] = s->avctx->inter_matrix[i];
907     }
908
909     /* precompute matrix */
910     /* for mjpeg, we do include qscale in the matrix */
911     if (s->out_format != FMT_MJPEG) {
912         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
913                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
914                           31, 1);
915         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
916                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
917                           31, 0);
918     }
919
920     if (ff_rate_control_init(s) < 0)
921         return -1;
922
923 #if FF_API_ERROR_RATE
924     FF_DISABLE_DEPRECATION_WARNINGS
925     if (avctx->error_rate)
926         s->error_rate = avctx->error_rate;
927     FF_ENABLE_DEPRECATION_WARNINGS;
928 #endif
929
930 #if FF_API_NORMALIZE_AQP
931     FF_DISABLE_DEPRECATION_WARNINGS
932     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
933         s->mpv_flags |= FF_MPV_FLAG_NAQ;
934     FF_ENABLE_DEPRECATION_WARNINGS;
935 #endif
936
937 #if FF_API_MV0
938     FF_DISABLE_DEPRECATION_WARNINGS
939     if (avctx->flags & CODEC_FLAG_MV0)
940         s->mpv_flags |= FF_MPV_FLAG_MV0;
941     FF_ENABLE_DEPRECATION_WARNINGS
942 #endif
943
944     if (avctx->b_frame_strategy == 2) {
945         for (i = 0; i < s->max_b_frames + 2; i++) {
946             s->tmp_frames[i] = av_frame_alloc();
947             if (!s->tmp_frames[i])
948                 return AVERROR(ENOMEM);
949
950             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
951             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
952             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
953
954             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
955             if (ret < 0)
956                 return ret;
957         }
958     }
959
960     return 0;
961 fail:
962     ff_MPV_encode_end(avctx);
963     return AVERROR_UNKNOWN;
964 }
965
966 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
967 {
968     MpegEncContext *s = avctx->priv_data;
969     int i;
970
971     ff_rate_control_uninit(s);
972
973     ff_MPV_common_end(s);
974     if (CONFIG_MJPEG_ENCODER &&
975         s->out_format == FMT_MJPEG)
976         ff_mjpeg_encode_close(s);
977
978     av_freep(&avctx->extradata);
979
980     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
981         av_frame_free(&s->tmp_frames[i]);
982
983     ff_free_picture_tables(&s->new_picture);
984     ff_mpeg_unref_picture(s, &s->new_picture);
985
986     av_freep(&s->avctx->stats_out);
987     av_freep(&s->ac_stats);
988
989     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
990     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
991     s->q_chroma_intra_matrix=   NULL;
992     s->q_chroma_intra_matrix16= NULL;
993     av_freep(&s->q_intra_matrix);
994     av_freep(&s->q_inter_matrix);
995     av_freep(&s->q_intra_matrix16);
996     av_freep(&s->q_inter_matrix16);
997     av_freep(&s->input_picture);
998     av_freep(&s->reordered_input_picture);
999     av_freep(&s->dct_offset);
1000
1001     return 0;
1002 }
1003
1004 static int get_sae(uint8_t *src, int ref, int stride)
1005 {
1006     int x,y;
1007     int acc = 0;
1008
1009     for (y = 0; y < 16; y++) {
1010         for (x = 0; x < 16; x++) {
1011             acc += FFABS(src[x + y * stride] - ref);
1012         }
1013     }
1014
1015     return acc;
1016 }
1017
1018 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1019                            uint8_t *ref, int stride)
1020 {
1021     int x, y, w, h;
1022     int acc = 0;
1023
1024     w = s->width  & ~15;
1025     h = s->height & ~15;
1026
1027     for (y = 0; y < h; y += 16) {
1028         for (x = 0; x < w; x += 16) {
1029             int offset = x + y * stride;
1030             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
1031                                      16);
1032             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1033             int sae  = get_sae(src + offset, mean, stride);
1034
1035             acc += sae + 500 < sad;
1036         }
1037     }
1038     return acc;
1039 }
1040
1041
1042 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1043 {
1044     Picture *pic = NULL;
1045     int64_t pts;
1046     int i, display_picture_number = 0, ret;
1047     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1048                                                  (s->low_delay ? 0 : 1);
1049     int direct = 1;
1050
1051     if (pic_arg) {
1052         pts = pic_arg->pts;
1053         display_picture_number = s->input_picture_number++;
1054
1055         if (pts != AV_NOPTS_VALUE) {
1056             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1057                 int64_t last = s->user_specified_pts;
1058
1059                 if (pts <= last) {
1060                     av_log(s->avctx, AV_LOG_ERROR,
1061                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1062                            pts, last);
1063                     return AVERROR(EINVAL);
1064                 }
1065
1066                 if (!s->low_delay && display_picture_number == 1)
1067                     s->dts_delta = pts - last;
1068             }
1069             s->user_specified_pts = pts;
1070         } else {
1071             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1072                 s->user_specified_pts =
1073                 pts = s->user_specified_pts + 1;
1074                 av_log(s->avctx, AV_LOG_INFO,
1075                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1076                        pts);
1077             } else {
1078                 pts = display_picture_number;
1079             }
1080         }
1081     }
1082
1083     if (pic_arg) {
1084         if (!pic_arg->buf[0])
1085             direct = 0;
1086         if (pic_arg->linesize[0] != s->linesize)
1087             direct = 0;
1088         if (pic_arg->linesize[1] != s->uvlinesize)
1089             direct = 0;
1090         if (pic_arg->linesize[2] != s->uvlinesize)
1091             direct = 0;
1092         if ((s->width & 15) || (s->height & 15))
1093             direct = 0;
1094         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1095             direct = 0;
1096         if (s->linesize & (STRIDE_ALIGN-1))
1097             direct = 0;
1098
1099         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1100                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1101
1102         if (direct) {
1103             i = ff_find_unused_picture(s, 1);
1104             if (i < 0)
1105                 return i;
1106
1107             pic = &s->picture[i];
1108             pic->reference = 3;
1109
1110             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1111                 return ret;
1112             if (ff_alloc_picture(s, pic, 1) < 0) {
1113                 return -1;
1114             }
1115         } else {
1116             i = ff_find_unused_picture(s, 0);
1117             if (i < 0)
1118                 return i;
1119
1120             pic = &s->picture[i];
1121             pic->reference = 3;
1122
1123             if (ff_alloc_picture(s, pic, 0) < 0) {
1124                 return -1;
1125             }
1126
1127             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1128                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1129                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1130                 // empty
1131             } else {
1132                 int h_chroma_shift, v_chroma_shift;
1133                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1134                                                  &h_chroma_shift,
1135                                                  &v_chroma_shift);
1136
1137                 for (i = 0; i < 3; i++) {
1138                     int src_stride = pic_arg->linesize[i];
1139                     int dst_stride = i ? s->uvlinesize : s->linesize;
1140                     int h_shift = i ? h_chroma_shift : 0;
1141                     int v_shift = i ? v_chroma_shift : 0;
1142                     int w = s->width  >> h_shift;
1143                     int h = s->height >> v_shift;
1144                     uint8_t *src = pic_arg->data[i];
1145                     uint8_t *dst = pic->f->data[i];
1146                     int vpad = 16;
1147
1148                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1149                         && !s->progressive_sequence
1150                         && FFALIGN(s->height, 32) - s->height > 16)
1151                         vpad = 32;
1152
1153                     if (!s->avctx->rc_buffer_size)
1154                         dst += INPLACE_OFFSET;
1155
1156                     if (src_stride == dst_stride)
1157                         memcpy(dst, src, src_stride * h);
1158                     else {
1159                         int h2 = h;
1160                         uint8_t *dst2 = dst;
1161                         while (h2--) {
1162                             memcpy(dst2, src, w);
1163                             dst2 += dst_stride;
1164                             src += src_stride;
1165                         }
1166                     }
1167                     if ((s->width & 15) || (s->height & (vpad-1))) {
1168                         s->mpvencdsp.draw_edges(dst, dst_stride,
1169                                                 w, h,
1170                                                 16>>h_shift,
1171                                                 vpad>>v_shift,
1172                                                 EDGE_BOTTOM);
1173                     }
1174                 }
1175             }
1176         }
1177         ret = av_frame_copy_props(pic->f, pic_arg);
1178         if (ret < 0)
1179             return ret;
1180
1181         pic->f->display_picture_number = display_picture_number;
1182         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1183     }
1184
1185     /* shift buffer entries */
1186     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1187         s->input_picture[i - 1] = s->input_picture[i];
1188
1189     s->input_picture[encoding_delay] = (Picture*) pic;
1190
1191     return 0;
1192 }
1193
1194 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1195 {
1196     int x, y, plane;
1197     int score = 0;
1198     int64_t score64 = 0;
1199
1200     for (plane = 0; plane < 3; plane++) {
1201         const int stride = p->f->linesize[plane];
1202         const int bw = plane ? 1 : 2;
1203         for (y = 0; y < s->mb_height * bw; y++) {
1204             for (x = 0; x < s->mb_width * bw; x++) {
1205                 int off = p->shared ? 0 : 16;
1206                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1207                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1208                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1209
1210                 switch (FFABS(s->avctx->frame_skip_exp)) {
1211                 case 0: score    =  FFMAX(score, v);          break;
1212                 case 1: score   += FFABS(v);                  break;
1213                 case 2: score64 += v * (int64_t)v;                       break;
1214                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1215                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1216                 }
1217             }
1218         }
1219     }
1220     emms_c();
1221
1222     if (score)
1223         score64 = score;
1224     if (s->avctx->frame_skip_exp < 0)
1225         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1226                       -1.0/s->avctx->frame_skip_exp);
1227
1228     if (score64 < s->avctx->frame_skip_threshold)
1229         return 1;
1230     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1231         return 1;
1232     return 0;
1233 }
1234
1235 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1236 {
1237     AVPacket pkt = { 0 };
1238     int ret, got_output;
1239
1240     av_init_packet(&pkt);
1241     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1242     if (ret < 0)
1243         return ret;
1244
1245     ret = pkt.size;
1246     av_free_packet(&pkt);
1247     return ret;
1248 }
1249
1250 static int estimate_best_b_count(MpegEncContext *s)
1251 {
1252     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1253     AVCodecContext *c = avcodec_alloc_context3(NULL);
1254     const int scale = s->avctx->brd_scale;
1255     int i, j, out_size, p_lambda, b_lambda, lambda2;
1256     int64_t best_rd  = INT64_MAX;
1257     int best_b_count = -1;
1258
1259     av_assert0(scale >= 0 && scale <= 3);
1260
1261     //emms_c();
1262     //s->next_picture_ptr->quality;
1263     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1264     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1265     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1266     if (!b_lambda) // FIXME we should do this somewhere else
1267         b_lambda = p_lambda;
1268     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1269                FF_LAMBDA_SHIFT;
1270
1271     c->width        = s->width  >> scale;
1272     c->height       = s->height >> scale;
1273     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1274     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1275     c->mb_decision  = s->avctx->mb_decision;
1276     c->me_cmp       = s->avctx->me_cmp;
1277     c->mb_cmp       = s->avctx->mb_cmp;
1278     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1279     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1280     c->time_base    = s->avctx->time_base;
1281     c->max_b_frames = s->max_b_frames;
1282
1283     if (avcodec_open2(c, codec, NULL) < 0)
1284         return -1;
1285
1286     for (i = 0; i < s->max_b_frames + 2; i++) {
1287         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1288                                                 s->next_picture_ptr;
1289         uint8_t *data[4];
1290
1291         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1292             pre_input = *pre_input_ptr;
1293             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1294
1295             if (!pre_input.shared && i) {
1296                 data[0] += INPLACE_OFFSET;
1297                 data[1] += INPLACE_OFFSET;
1298                 data[2] += INPLACE_OFFSET;
1299             }
1300
1301             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1302                                        s->tmp_frames[i]->linesize[0],
1303                                        data[0],
1304                                        pre_input.f->linesize[0],
1305                                        c->width, c->height);
1306             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1307                                        s->tmp_frames[i]->linesize[1],
1308                                        data[1],
1309                                        pre_input.f->linesize[1],
1310                                        c->width >> 1, c->height >> 1);
1311             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1312                                        s->tmp_frames[i]->linesize[2],
1313                                        data[2],
1314                                        pre_input.f->linesize[2],
1315                                        c->width >> 1, c->height >> 1);
1316         }
1317     }
1318
1319     for (j = 0; j < s->max_b_frames + 1; j++) {
1320         int64_t rd = 0;
1321
1322         if (!s->input_picture[j])
1323             break;
1324
1325         c->error[0] = c->error[1] = c->error[2] = 0;
1326
1327         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1328         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1329
1330         out_size = encode_frame(c, s->tmp_frames[0]);
1331
1332         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1333
1334         for (i = 0; i < s->max_b_frames + 1; i++) {
1335             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1336
1337             s->tmp_frames[i + 1]->pict_type = is_p ?
1338                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1339             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1340
1341             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1342
1343             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1344         }
1345
1346         /* get the delayed frames */
1347         while (out_size) {
1348             out_size = encode_frame(c, NULL);
1349             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1350         }
1351
1352         rd += c->error[0] + c->error[1] + c->error[2];
1353
1354         if (rd < best_rd) {
1355             best_rd = rd;
1356             best_b_count = j;
1357         }
1358     }
1359
1360     avcodec_close(c);
1361     av_freep(&c);
1362
1363     return best_b_count;
1364 }
1365
1366 static int select_input_picture(MpegEncContext *s)
1367 {
1368     int i, ret;
1369
1370     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1371         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1372     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1373
1374     /* set next picture type & ordering */
1375     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1376         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1377             if (s->picture_in_gop_number < s->gop_size &&
1378                 s->next_picture_ptr &&
1379                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1380                 // FIXME check that te gop check above is +-1 correct
1381                 av_frame_unref(s->input_picture[0]->f);
1382
1383                 ff_vbv_update(s, 0);
1384
1385                 goto no_output_pic;
1386             }
1387         }
1388
1389         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1390             s->next_picture_ptr == NULL || s->intra_only) {
1391             s->reordered_input_picture[0] = s->input_picture[0];
1392             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1393             s->reordered_input_picture[0]->f->coded_picture_number =
1394                 s->coded_picture_number++;
1395         } else {
1396             int b_frames;
1397
1398             if (s->flags & CODEC_FLAG_PASS2) {
1399                 for (i = 0; i < s->max_b_frames + 1; i++) {
1400                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1401
1402                     if (pict_num >= s->rc_context.num_entries)
1403                         break;
1404                     if (!s->input_picture[i]) {
1405                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1406                         break;
1407                     }
1408
1409                     s->input_picture[i]->f->pict_type =
1410                         s->rc_context.entry[pict_num].new_pict_type;
1411                 }
1412             }
1413
1414             if (s->avctx->b_frame_strategy == 0) {
1415                 b_frames = s->max_b_frames;
1416                 while (b_frames && !s->input_picture[b_frames])
1417                     b_frames--;
1418             } else if (s->avctx->b_frame_strategy == 1) {
1419                 for (i = 1; i < s->max_b_frames + 1; i++) {
1420                     if (s->input_picture[i] &&
1421                         s->input_picture[i]->b_frame_score == 0) {
1422                         s->input_picture[i]->b_frame_score =
1423                             get_intra_count(s,
1424                                             s->input_picture[i    ]->f->data[0],
1425                                             s->input_picture[i - 1]->f->data[0],
1426                                             s->linesize) + 1;
1427                     }
1428                 }
1429                 for (i = 0; i < s->max_b_frames + 1; i++) {
1430                     if (s->input_picture[i] == NULL ||
1431                         s->input_picture[i]->b_frame_score - 1 >
1432                             s->mb_num / s->avctx->b_sensitivity)
1433                         break;
1434                 }
1435
1436                 b_frames = FFMAX(0, i - 1);
1437
1438                 /* reset scores */
1439                 for (i = 0; i < b_frames + 1; i++) {
1440                     s->input_picture[i]->b_frame_score = 0;
1441                 }
1442             } else if (s->avctx->b_frame_strategy == 2) {
1443                 b_frames = estimate_best_b_count(s);
1444             } else {
1445                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1446                 b_frames = 0;
1447             }
1448
1449             emms_c();
1450
1451             for (i = b_frames - 1; i >= 0; i--) {
1452                 int type = s->input_picture[i]->f->pict_type;
1453                 if (type && type != AV_PICTURE_TYPE_B)
1454                     b_frames = i;
1455             }
1456             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1457                 b_frames == s->max_b_frames) {
1458                 av_log(s->avctx, AV_LOG_ERROR,
1459                        "warning, too many b frames in a row\n");
1460             }
1461
1462             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1463                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1464                     s->gop_size > s->picture_in_gop_number) {
1465                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1466                 } else {
1467                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1468                         b_frames = 0;
1469                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1470                 }
1471             }
1472
1473             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1474                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1475                 b_frames--;
1476
1477             s->reordered_input_picture[0] = s->input_picture[b_frames];
1478             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1479                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1480             s->reordered_input_picture[0]->f->coded_picture_number =
1481                 s->coded_picture_number++;
1482             for (i = 0; i < b_frames; i++) {
1483                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1484                 s->reordered_input_picture[i + 1]->f->pict_type =
1485                     AV_PICTURE_TYPE_B;
1486                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1487                     s->coded_picture_number++;
1488             }
1489         }
1490     }
1491 no_output_pic:
1492     if (s->reordered_input_picture[0]) {
1493         s->reordered_input_picture[0]->reference =
1494            s->reordered_input_picture[0]->f->pict_type !=
1495                AV_PICTURE_TYPE_B ? 3 : 0;
1496
1497         ff_mpeg_unref_picture(s, &s->new_picture);
1498         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1499             return ret;
1500
1501         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1502             // input is a shared pix, so we can't modifiy it -> alloc a new
1503             // one & ensure that the shared one is reuseable
1504
1505             Picture *pic;
1506             int i = ff_find_unused_picture(s, 0);
1507             if (i < 0)
1508                 return i;
1509             pic = &s->picture[i];
1510
1511             pic->reference = s->reordered_input_picture[0]->reference;
1512             if (ff_alloc_picture(s, pic, 0) < 0) {
1513                 return -1;
1514             }
1515
1516             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1517             if (ret < 0)
1518                 return ret;
1519
1520             /* mark us unused / free shared pic */
1521             av_frame_unref(s->reordered_input_picture[0]->f);
1522             s->reordered_input_picture[0]->shared = 0;
1523
1524             s->current_picture_ptr = pic;
1525         } else {
1526             // input is not a shared pix -> reuse buffer for current_pix
1527             s->current_picture_ptr = s->reordered_input_picture[0];
1528             for (i = 0; i < 4; i++) {
1529                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1530             }
1531         }
1532         ff_mpeg_unref_picture(s, &s->current_picture);
1533         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1534                                        s->current_picture_ptr)) < 0)
1535             return ret;
1536
1537         s->picture_number = s->new_picture.f->display_picture_number;
1538     } else {
1539         ff_mpeg_unref_picture(s, &s->new_picture);
1540     }
1541     return 0;
1542 }
1543
1544 static void frame_end(MpegEncContext *s)
1545 {
1546     if (s->unrestricted_mv &&
1547         s->current_picture.reference &&
1548         !s->intra_only) {
1549         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1550         int hshift = desc->log2_chroma_w;
1551         int vshift = desc->log2_chroma_h;
1552         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1553                                 s->current_picture.f->linesize[0],
1554                                 s->h_edge_pos, s->v_edge_pos,
1555                                 EDGE_WIDTH, EDGE_WIDTH,
1556                                 EDGE_TOP | EDGE_BOTTOM);
1557         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1558                                 s->current_picture.f->linesize[1],
1559                                 s->h_edge_pos >> hshift,
1560                                 s->v_edge_pos >> vshift,
1561                                 EDGE_WIDTH >> hshift,
1562                                 EDGE_WIDTH >> vshift,
1563                                 EDGE_TOP | EDGE_BOTTOM);
1564         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1565                                 s->current_picture.f->linesize[2],
1566                                 s->h_edge_pos >> hshift,
1567                                 s->v_edge_pos >> vshift,
1568                                 EDGE_WIDTH >> hshift,
1569                                 EDGE_WIDTH >> vshift,
1570                                 EDGE_TOP | EDGE_BOTTOM);
1571     }
1572
1573     emms_c();
1574
1575     s->last_pict_type                 = s->pict_type;
1576     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1577     if (s->pict_type!= AV_PICTURE_TYPE_B)
1578         s->last_non_b_pict_type = s->pict_type;
1579
1580     s->avctx->coded_frame = s->current_picture_ptr->f;
1581
1582 }
1583
1584 static void update_noise_reduction(MpegEncContext *s)
1585 {
1586     int intra, i;
1587
1588     for (intra = 0; intra < 2; intra++) {
1589         if (s->dct_count[intra] > (1 << 16)) {
1590             for (i = 0; i < 64; i++) {
1591                 s->dct_error_sum[intra][i] >>= 1;
1592             }
1593             s->dct_count[intra] >>= 1;
1594         }
1595
1596         for (i = 0; i < 64; i++) {
1597             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1598                                        s->dct_count[intra] +
1599                                        s->dct_error_sum[intra][i] / 2) /
1600                                       (s->dct_error_sum[intra][i] + 1);
1601         }
1602     }
1603 }
1604
1605 static int frame_start(MpegEncContext *s)
1606 {
1607     int ret;
1608
1609     /* mark & release old frames */
1610     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1611         s->last_picture_ptr != s->next_picture_ptr &&
1612         s->last_picture_ptr->f->buf[0]) {
1613         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1614     }
1615
1616     s->current_picture_ptr->f->pict_type = s->pict_type;
1617     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1618
1619     ff_mpeg_unref_picture(s, &s->current_picture);
1620     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1621                                    s->current_picture_ptr)) < 0)
1622         return ret;
1623
1624     if (s->pict_type != AV_PICTURE_TYPE_B) {
1625         s->last_picture_ptr = s->next_picture_ptr;
1626         if (!s->droppable)
1627             s->next_picture_ptr = s->current_picture_ptr;
1628     }
1629
1630     if (s->last_picture_ptr) {
1631         ff_mpeg_unref_picture(s, &s->last_picture);
1632         if (s->last_picture_ptr->f->buf[0] &&
1633             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1634                                        s->last_picture_ptr)) < 0)
1635             return ret;
1636     }
1637     if (s->next_picture_ptr) {
1638         ff_mpeg_unref_picture(s, &s->next_picture);
1639         if (s->next_picture_ptr->f->buf[0] &&
1640             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1641                                        s->next_picture_ptr)) < 0)
1642             return ret;
1643     }
1644
1645     if (s->picture_structure!= PICT_FRAME) {
1646         int i;
1647         for (i = 0; i < 4; i++) {
1648             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1649                 s->current_picture.f->data[i] +=
1650                     s->current_picture.f->linesize[i];
1651             }
1652             s->current_picture.f->linesize[i] *= 2;
1653             s->last_picture.f->linesize[i]    *= 2;
1654             s->next_picture.f->linesize[i]    *= 2;
1655         }
1656     }
1657
1658     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1659         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1660         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1661     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1662         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1663         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1664     } else {
1665         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1666         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1667     }
1668
1669     if (s->dct_error_sum) {
1670         av_assert2(s->avctx->noise_reduction && s->encoding);
1671         update_noise_reduction(s);
1672     }
1673
1674     return 0;
1675 }
1676
1677 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1678                           const AVFrame *pic_arg, int *got_packet)
1679 {
1680     MpegEncContext *s = avctx->priv_data;
1681     int i, stuffing_count, ret;
1682     int context_count = s->slice_context_count;
1683
1684     s->picture_in_gop_number++;
1685
1686     if (load_input_picture(s, pic_arg) < 0)
1687         return -1;
1688
1689     if (select_input_picture(s) < 0) {
1690         return -1;
1691     }
1692
1693     /* output? */
1694     if (s->new_picture.f->data[0]) {
1695         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1696             return ret;
1697         if (s->mb_info) {
1698             s->mb_info_ptr = av_packet_new_side_data(pkt,
1699                                  AV_PKT_DATA_H263_MB_INFO,
1700                                  s->mb_width*s->mb_height*12);
1701             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1702         }
1703
1704         for (i = 0; i < context_count; i++) {
1705             int start_y = s->thread_context[i]->start_mb_y;
1706             int   end_y = s->thread_context[i]->  end_mb_y;
1707             int h       = s->mb_height;
1708             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1709             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1710
1711             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1712         }
1713
1714         s->pict_type = s->new_picture.f->pict_type;
1715         //emms_c();
1716         ret = frame_start(s);
1717         if (ret < 0)
1718             return ret;
1719 vbv_retry:
1720         if (encode_picture(s, s->picture_number) < 0)
1721             return -1;
1722
1723         avctx->header_bits = s->header_bits;
1724         avctx->mv_bits     = s->mv_bits;
1725         avctx->misc_bits   = s->misc_bits;
1726         avctx->i_tex_bits  = s->i_tex_bits;
1727         avctx->p_tex_bits  = s->p_tex_bits;
1728         avctx->i_count     = s->i_count;
1729         // FIXME f/b_count in avctx
1730         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1731         avctx->skip_count  = s->skip_count;
1732
1733         frame_end(s);
1734
1735         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1736             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1737
1738         if (avctx->rc_buffer_size) {
1739             RateControlContext *rcc = &s->rc_context;
1740             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1741
1742             if (put_bits_count(&s->pb) > max_size &&
1743                 s->lambda < s->avctx->lmax) {
1744                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1745                                        (s->qscale + 1) / s->qscale);
1746                 if (s->adaptive_quant) {
1747                     int i;
1748                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1749                         s->lambda_table[i] =
1750                             FFMAX(s->lambda_table[i] + 1,
1751                                   s->lambda_table[i] * (s->qscale + 1) /
1752                                   s->qscale);
1753                 }
1754                 s->mb_skipped = 0;        // done in frame_start()
1755                 // done in encode_picture() so we must undo it
1756                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1757                     if (s->flipflop_rounding          ||
1758                         s->codec_id == AV_CODEC_ID_H263P ||
1759                         s->codec_id == AV_CODEC_ID_MPEG4)
1760                         s->no_rounding ^= 1;
1761                 }
1762                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1763                     s->time_base       = s->last_time_base;
1764                     s->last_non_b_time = s->time - s->pp_time;
1765                 }
1766                 for (i = 0; i < context_count; i++) {
1767                     PutBitContext *pb = &s->thread_context[i]->pb;
1768                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1769                 }
1770                 goto vbv_retry;
1771             }
1772
1773             av_assert0(s->avctx->rc_max_rate);
1774         }
1775
1776         if (s->flags & CODEC_FLAG_PASS1)
1777             ff_write_pass1_stats(s);
1778
1779         for (i = 0; i < 4; i++) {
1780             s->current_picture_ptr->f->error[i] =
1781             s->current_picture.f->error[i] =
1782                 s->current_picture.error[i];
1783             avctx->error[i] += s->current_picture_ptr->f->error[i];
1784         }
1785
1786         if (s->flags & CODEC_FLAG_PASS1)
1787             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1788                    avctx->i_tex_bits + avctx->p_tex_bits ==
1789                        put_bits_count(&s->pb));
1790         flush_put_bits(&s->pb);
1791         s->frame_bits  = put_bits_count(&s->pb);
1792
1793         stuffing_count = ff_vbv_update(s, s->frame_bits);
1794         s->stuffing_bits = 8*stuffing_count;
1795         if (stuffing_count) {
1796             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1797                     stuffing_count + 50) {
1798                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1799                 return -1;
1800             }
1801
1802             switch (s->codec_id) {
1803             case AV_CODEC_ID_MPEG1VIDEO:
1804             case AV_CODEC_ID_MPEG2VIDEO:
1805                 while (stuffing_count--) {
1806                     put_bits(&s->pb, 8, 0);
1807                 }
1808             break;
1809             case AV_CODEC_ID_MPEG4:
1810                 put_bits(&s->pb, 16, 0);
1811                 put_bits(&s->pb, 16, 0x1C3);
1812                 stuffing_count -= 4;
1813                 while (stuffing_count--) {
1814                     put_bits(&s->pb, 8, 0xFF);
1815                 }
1816             break;
1817             default:
1818                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1819             }
1820             flush_put_bits(&s->pb);
1821             s->frame_bits  = put_bits_count(&s->pb);
1822         }
1823
1824         /* update mpeg1/2 vbv_delay for CBR */
1825         if (s->avctx->rc_max_rate                          &&
1826             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1827             s->out_format == FMT_MPEG1                     &&
1828             90000LL * (avctx->rc_buffer_size - 1) <=
1829                 s->avctx->rc_max_rate * 0xFFFFLL) {
1830             int vbv_delay, min_delay;
1831             double inbits  = s->avctx->rc_max_rate *
1832                              av_q2d(s->avctx->time_base);
1833             int    minbits = s->frame_bits - 8 *
1834                              (s->vbv_delay_ptr - s->pb.buf - 1);
1835             double bits    = s->rc_context.buffer_index + minbits - inbits;
1836
1837             if (bits < 0)
1838                 av_log(s->avctx, AV_LOG_ERROR,
1839                        "Internal error, negative bits\n");
1840
1841             assert(s->repeat_first_field == 0);
1842
1843             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1844             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1845                         s->avctx->rc_max_rate;
1846
1847             vbv_delay = FFMAX(vbv_delay, min_delay);
1848
1849             av_assert0(vbv_delay < 0xFFFF);
1850
1851             s->vbv_delay_ptr[0] &= 0xF8;
1852             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1853             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1854             s->vbv_delay_ptr[2] &= 0x07;
1855             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1856             avctx->vbv_delay     = vbv_delay * 300;
1857         }
1858         s->total_bits     += s->frame_bits;
1859         avctx->frame_bits  = s->frame_bits;
1860
1861         pkt->pts = s->current_picture.f->pts;
1862         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1863             if (!s->current_picture.f->coded_picture_number)
1864                 pkt->dts = pkt->pts - s->dts_delta;
1865             else
1866                 pkt->dts = s->reordered_pts;
1867             s->reordered_pts = pkt->pts;
1868         } else
1869             pkt->dts = pkt->pts;
1870         if (s->current_picture.f->key_frame)
1871             pkt->flags |= AV_PKT_FLAG_KEY;
1872         if (s->mb_info)
1873             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1874     } else {
1875         s->frame_bits = 0;
1876     }
1877
1878     /* release non-reference frames */
1879     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1880         if (!s->picture[i].reference)
1881             ff_mpeg_unref_picture(s, &s->picture[i]);
1882     }
1883
1884     av_assert1((s->frame_bits & 7) == 0);
1885
1886     pkt->size = s->frame_bits / 8;
1887     *got_packet = !!pkt->size;
1888     return 0;
1889 }
1890
1891 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1892                                                 int n, int threshold)
1893 {
1894     static const char tab[64] = {
1895         3, 2, 2, 1, 1, 1, 1, 1,
1896         1, 1, 1, 1, 1, 1, 1, 1,
1897         1, 1, 1, 1, 1, 1, 1, 1,
1898         0, 0, 0, 0, 0, 0, 0, 0,
1899         0, 0, 0, 0, 0, 0, 0, 0,
1900         0, 0, 0, 0, 0, 0, 0, 0,
1901         0, 0, 0, 0, 0, 0, 0, 0,
1902         0, 0, 0, 0, 0, 0, 0, 0
1903     };
1904     int score = 0;
1905     int run = 0;
1906     int i;
1907     int16_t *block = s->block[n];
1908     const int last_index = s->block_last_index[n];
1909     int skip_dc;
1910
1911     if (threshold < 0) {
1912         skip_dc = 0;
1913         threshold = -threshold;
1914     } else
1915         skip_dc = 1;
1916
1917     /* Are all we could set to zero already zero? */
1918     if (last_index <= skip_dc - 1)
1919         return;
1920
1921     for (i = 0; i <= last_index; i++) {
1922         const int j = s->intra_scantable.permutated[i];
1923         const int level = FFABS(block[j]);
1924         if (level == 1) {
1925             if (skip_dc && i == 0)
1926                 continue;
1927             score += tab[run];
1928             run = 0;
1929         } else if (level > 1) {
1930             return;
1931         } else {
1932             run++;
1933         }
1934     }
1935     if (score >= threshold)
1936         return;
1937     for (i = skip_dc; i <= last_index; i++) {
1938         const int j = s->intra_scantable.permutated[i];
1939         block[j] = 0;
1940     }
1941     if (block[0])
1942         s->block_last_index[n] = 0;
1943     else
1944         s->block_last_index[n] = -1;
1945 }
1946
1947 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1948                                int last_index)
1949 {
1950     int i;
1951     const int maxlevel = s->max_qcoeff;
1952     const int minlevel = s->min_qcoeff;
1953     int overflow = 0;
1954
1955     if (s->mb_intra) {
1956         i = 1; // skip clipping of intra dc
1957     } else
1958         i = 0;
1959
1960     for (; i <= last_index; i++) {
1961         const int j = s->intra_scantable.permutated[i];
1962         int level = block[j];
1963
1964         if (level > maxlevel) {
1965             level = maxlevel;
1966             overflow++;
1967         } else if (level < minlevel) {
1968             level = minlevel;
1969             overflow++;
1970         }
1971
1972         block[j] = level;
1973     }
1974
1975     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1976         av_log(s->avctx, AV_LOG_INFO,
1977                "warning, clipping %d dct coefficients to %d..%d\n",
1978                overflow, minlevel, maxlevel);
1979 }
1980
1981 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1982 {
1983     int x, y;
1984     // FIXME optimize
1985     for (y = 0; y < 8; y++) {
1986         for (x = 0; x < 8; x++) {
1987             int x2, y2;
1988             int sum = 0;
1989             int sqr = 0;
1990             int count = 0;
1991
1992             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1993                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1994                     int v = ptr[x2 + y2 * stride];
1995                     sum += v;
1996                     sqr += v * v;
1997                     count++;
1998                 }
1999             }
2000             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2001         }
2002     }
2003 }
2004
2005 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2006                                                 int motion_x, int motion_y,
2007                                                 int mb_block_height,
2008                                                 int mb_block_width,
2009                                                 int mb_block_count)
2010 {
2011     int16_t weight[12][64];
2012     int16_t orig[12][64];
2013     const int mb_x = s->mb_x;
2014     const int mb_y = s->mb_y;
2015     int i;
2016     int skip_dct[12];
2017     int dct_offset = s->linesize * 8; // default for progressive frames
2018     int uv_dct_offset = s->uvlinesize * 8;
2019     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2020     ptrdiff_t wrap_y, wrap_c;
2021
2022     for (i = 0; i < mb_block_count; i++)
2023         skip_dct[i] = s->skipdct;
2024
2025     if (s->adaptive_quant) {
2026         const int last_qp = s->qscale;
2027         const int mb_xy = mb_x + mb_y * s->mb_stride;
2028
2029         s->lambda = s->lambda_table[mb_xy];
2030         update_qscale(s);
2031
2032         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2033             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2034             s->dquant = s->qscale - last_qp;
2035
2036             if (s->out_format == FMT_H263) {
2037                 s->dquant = av_clip(s->dquant, -2, 2);
2038
2039                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2040                     if (!s->mb_intra) {
2041                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2042                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2043                                 s->dquant = 0;
2044                         }
2045                         if (s->mv_type == MV_TYPE_8X8)
2046                             s->dquant = 0;
2047                     }
2048                 }
2049             }
2050         }
2051         ff_set_qscale(s, last_qp + s->dquant);
2052     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2053         ff_set_qscale(s, s->qscale + s->dquant);
2054
2055     wrap_y = s->linesize;
2056     wrap_c = s->uvlinesize;
2057     ptr_y  = s->new_picture.f->data[0] +
2058              (mb_y * 16 * wrap_y)              + mb_x * 16;
2059     ptr_cb = s->new_picture.f->data[1] +
2060              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2061     ptr_cr = s->new_picture.f->data[2] +
2062              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2063
2064     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2065         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2066         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2067         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2068         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2069                                  wrap_y, wrap_y,
2070                                  16, 16, mb_x * 16, mb_y * 16,
2071                                  s->width, s->height);
2072         ptr_y = ebuf;
2073         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2074                                  wrap_c, wrap_c,
2075                                  mb_block_width, mb_block_height,
2076                                  mb_x * mb_block_width, mb_y * mb_block_height,
2077                                  cw, ch);
2078         ptr_cb = ebuf + 16 * wrap_y;
2079         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2080                                  wrap_c, wrap_c,
2081                                  mb_block_width, mb_block_height,
2082                                  mb_x * mb_block_width, mb_y * mb_block_height,
2083                                  cw, ch);
2084         ptr_cr = ebuf + 16 * wrap_y + 16;
2085     }
2086
2087     if (s->mb_intra) {
2088         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2089             int progressive_score, interlaced_score;
2090
2091             s->interlaced_dct = 0;
2092             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2093                                                     NULL, wrap_y, 8) +
2094                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2095                                                     NULL, wrap_y, 8) - 400;
2096
2097             if (progressive_score > 0) {
2098                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2099                                                        NULL, wrap_y * 2, 8) +
2100                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2101                                                        NULL, wrap_y * 2, 8);
2102                 if (progressive_score > interlaced_score) {
2103                     s->interlaced_dct = 1;
2104
2105                     dct_offset = wrap_y;
2106                     uv_dct_offset = wrap_c;
2107                     wrap_y <<= 1;
2108                     if (s->chroma_format == CHROMA_422 ||
2109                         s->chroma_format == CHROMA_444)
2110                         wrap_c <<= 1;
2111                 }
2112             }
2113         }
2114
2115         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2116         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2117         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2118         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2119
2120         if (s->flags & CODEC_FLAG_GRAY) {
2121             skip_dct[4] = 1;
2122             skip_dct[5] = 1;
2123         } else {
2124             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2125             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2126             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2127                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2128                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2129             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2130                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2131                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2132                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2133                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2134                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2135                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2136             }
2137         }
2138     } else {
2139         op_pixels_func (*op_pix)[4];
2140         qpel_mc_func (*op_qpix)[16];
2141         uint8_t *dest_y, *dest_cb, *dest_cr;
2142
2143         dest_y  = s->dest[0];
2144         dest_cb = s->dest[1];
2145         dest_cr = s->dest[2];
2146
2147         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2148             op_pix  = s->hdsp.put_pixels_tab;
2149             op_qpix = s->qdsp.put_qpel_pixels_tab;
2150         } else {
2151             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2152             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2153         }
2154
2155         if (s->mv_dir & MV_DIR_FORWARD) {
2156             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2157                           s->last_picture.f->data,
2158                           op_pix, op_qpix);
2159             op_pix  = s->hdsp.avg_pixels_tab;
2160             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2161         }
2162         if (s->mv_dir & MV_DIR_BACKWARD) {
2163             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2164                           s->next_picture.f->data,
2165                           op_pix, op_qpix);
2166         }
2167
2168         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2169             int progressive_score, interlaced_score;
2170
2171             s->interlaced_dct = 0;
2172             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2173                                                     ptr_y,              wrap_y,
2174                                                     8) +
2175                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2176                                                     ptr_y + wrap_y * 8, wrap_y,
2177                                                     8) - 400;
2178
2179             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2180                 progressive_score -= 400;
2181
2182             if (progressive_score > 0) {
2183                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2184                                                        ptr_y,
2185                                                        wrap_y * 2, 8) +
2186                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2187                                                        ptr_y + wrap_y,
2188                                                        wrap_y * 2, 8);
2189
2190                 if (progressive_score > interlaced_score) {
2191                     s->interlaced_dct = 1;
2192
2193                     dct_offset = wrap_y;
2194                     uv_dct_offset = wrap_c;
2195                     wrap_y <<= 1;
2196                     if (s->chroma_format == CHROMA_422)
2197                         wrap_c <<= 1;
2198                 }
2199             }
2200         }
2201
2202         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2203         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2204         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2205                             dest_y + dct_offset, wrap_y);
2206         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2207                             dest_y + dct_offset + 8, wrap_y);
2208
2209         if (s->flags & CODEC_FLAG_GRAY) {
2210             skip_dct[4] = 1;
2211             skip_dct[5] = 1;
2212         } else {
2213             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2214             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2215             if (!s->chroma_y_shift) { /* 422 */
2216                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2217                                     dest_cb + uv_dct_offset, wrap_c);
2218                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2219                                     dest_cr + uv_dct_offset, wrap_c);
2220             }
2221         }
2222         /* pre quantization */
2223         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2224                 2 * s->qscale * s->qscale) {
2225             // FIXME optimize
2226             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2227                               wrap_y, 8) < 20 * s->qscale)
2228                 skip_dct[0] = 1;
2229             if (s->dsp.sad[1](NULL, ptr_y + 8,
2230                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2231                 skip_dct[1] = 1;
2232             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2233                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2234                 skip_dct[2] = 1;
2235             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2236                               dest_y + dct_offset + 8,
2237                               wrap_y, 8) < 20 * s->qscale)
2238                 skip_dct[3] = 1;
2239             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2240                               wrap_c, 8) < 20 * s->qscale)
2241                 skip_dct[4] = 1;
2242             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2243                               wrap_c, 8) < 20 * s->qscale)
2244                 skip_dct[5] = 1;
2245             if (!s->chroma_y_shift) { /* 422 */
2246                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2247                                   dest_cb + uv_dct_offset,
2248                                   wrap_c, 8) < 20 * s->qscale)
2249                     skip_dct[6] = 1;
2250                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2251                                   dest_cr + uv_dct_offset,
2252                                   wrap_c, 8) < 20 * s->qscale)
2253                     skip_dct[7] = 1;
2254             }
2255         }
2256     }
2257
2258     if (s->quantizer_noise_shaping) {
2259         if (!skip_dct[0])
2260             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2261         if (!skip_dct[1])
2262             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2263         if (!skip_dct[2])
2264             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2265         if (!skip_dct[3])
2266             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2267         if (!skip_dct[4])
2268             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2269         if (!skip_dct[5])
2270             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2271         if (!s->chroma_y_shift) { /* 422 */
2272             if (!skip_dct[6])
2273                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2274                                   wrap_c);
2275             if (!skip_dct[7])
2276                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2277                                   wrap_c);
2278         }
2279         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2280     }
2281
2282     /* DCT & quantize */
2283     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2284     {
2285         for (i = 0; i < mb_block_count; i++) {
2286             if (!skip_dct[i]) {
2287                 int overflow;
2288                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2289                 // FIXME we could decide to change to quantizer instead of
2290                 // clipping
2291                 // JS: I don't think that would be a good idea it could lower
2292                 //     quality instead of improve it. Just INTRADC clipping
2293                 //     deserves changes in quantizer
2294                 if (overflow)
2295                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2296             } else
2297                 s->block_last_index[i] = -1;
2298         }
2299         if (s->quantizer_noise_shaping) {
2300             for (i = 0; i < mb_block_count; i++) {
2301                 if (!skip_dct[i]) {
2302                     s->block_last_index[i] =
2303                         dct_quantize_refine(s, s->block[i], weight[i],
2304                                             orig[i], i, s->qscale);
2305                 }
2306             }
2307         }
2308
2309         if (s->luma_elim_threshold && !s->mb_intra)
2310             for (i = 0; i < 4; i++)
2311                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2312         if (s->chroma_elim_threshold && !s->mb_intra)
2313             for (i = 4; i < mb_block_count; i++)
2314                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2315
2316         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2317             for (i = 0; i < mb_block_count; i++) {
2318                 if (s->block_last_index[i] == -1)
2319                     s->coded_score[i] = INT_MAX / 256;
2320             }
2321         }
2322     }
2323
2324     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2325         s->block_last_index[4] =
2326         s->block_last_index[5] = 0;
2327         s->block[4][0] =
2328         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2329         if (!s->chroma_y_shift) { /* 422 / 444 */
2330             for (i=6; i<12; i++) {
2331                 s->block_last_index[i] = 0;
2332                 s->block[i][0] = s->block[4][0];
2333             }
2334         }
2335     }
2336
2337     // non c quantize code returns incorrect block_last_index FIXME
2338     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2339         for (i = 0; i < mb_block_count; i++) {
2340             int j;
2341             if (s->block_last_index[i] > 0) {
2342                 for (j = 63; j > 0; j--) {
2343                     if (s->block[i][s->intra_scantable.permutated[j]])
2344                         break;
2345                 }
2346                 s->block_last_index[i] = j;
2347             }
2348         }
2349     }
2350
2351     /* huffman encode */
2352     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2353     case AV_CODEC_ID_MPEG1VIDEO:
2354     case AV_CODEC_ID_MPEG2VIDEO:
2355         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2356             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2357         break;
2358     case AV_CODEC_ID_MPEG4:
2359         if (CONFIG_MPEG4_ENCODER)
2360             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2361         break;
2362     case AV_CODEC_ID_MSMPEG4V2:
2363     case AV_CODEC_ID_MSMPEG4V3:
2364     case AV_CODEC_ID_WMV1:
2365         if (CONFIG_MSMPEG4_ENCODER)
2366             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2367         break;
2368     case AV_CODEC_ID_WMV2:
2369         if (CONFIG_WMV2_ENCODER)
2370             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2371         break;
2372     case AV_CODEC_ID_H261:
2373         if (CONFIG_H261_ENCODER)
2374             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2375         break;
2376     case AV_CODEC_ID_H263:
2377     case AV_CODEC_ID_H263P:
2378     case AV_CODEC_ID_FLV1:
2379     case AV_CODEC_ID_RV10:
2380     case AV_CODEC_ID_RV20:
2381         if (CONFIG_H263_ENCODER)
2382             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2383         break;
2384     case AV_CODEC_ID_MJPEG:
2385     case AV_CODEC_ID_AMV:
2386         if (CONFIG_MJPEG_ENCODER)
2387             ff_mjpeg_encode_mb(s, s->block);
2388         break;
2389     default:
2390         av_assert1(0);
2391     }
2392 }
2393
2394 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2395 {
2396     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2397     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2398     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2399 }
2400
2401 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2402     int i;
2403
2404     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2405
2406     /* mpeg1 */
2407     d->mb_skip_run= s->mb_skip_run;
2408     for(i=0; i<3; i++)
2409         d->last_dc[i] = s->last_dc[i];
2410
2411     /* statistics */
2412     d->mv_bits= s->mv_bits;
2413     d->i_tex_bits= s->i_tex_bits;
2414     d->p_tex_bits= s->p_tex_bits;
2415     d->i_count= s->i_count;
2416     d->f_count= s->f_count;
2417     d->b_count= s->b_count;
2418     d->skip_count= s->skip_count;
2419     d->misc_bits= s->misc_bits;
2420     d->last_bits= 0;
2421
2422     d->mb_skipped= 0;
2423     d->qscale= s->qscale;
2424     d->dquant= s->dquant;
2425
2426     d->esc3_level_length= s->esc3_level_length;
2427 }
2428
2429 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2430     int i;
2431
2432     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2433     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2434
2435     /* mpeg1 */
2436     d->mb_skip_run= s->mb_skip_run;
2437     for(i=0; i<3; i++)
2438         d->last_dc[i] = s->last_dc[i];
2439
2440     /* statistics */
2441     d->mv_bits= s->mv_bits;
2442     d->i_tex_bits= s->i_tex_bits;
2443     d->p_tex_bits= s->p_tex_bits;
2444     d->i_count= s->i_count;
2445     d->f_count= s->f_count;
2446     d->b_count= s->b_count;
2447     d->skip_count= s->skip_count;
2448     d->misc_bits= s->misc_bits;
2449
2450     d->mb_intra= s->mb_intra;
2451     d->mb_skipped= s->mb_skipped;
2452     d->mv_type= s->mv_type;
2453     d->mv_dir= s->mv_dir;
2454     d->pb= s->pb;
2455     if(s->data_partitioning){
2456         d->pb2= s->pb2;
2457         d->tex_pb= s->tex_pb;
2458     }
2459     d->block= s->block;
2460     for(i=0; i<8; i++)
2461         d->block_last_index[i]= s->block_last_index[i];
2462     d->interlaced_dct= s->interlaced_dct;
2463     d->qscale= s->qscale;
2464
2465     d->esc3_level_length= s->esc3_level_length;
2466 }
2467
2468 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2469                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2470                            int *dmin, int *next_block, int motion_x, int motion_y)
2471 {
2472     int score;
2473     uint8_t *dest_backup[3];
2474
2475     copy_context_before_encode(s, backup, type);
2476
2477     s->block= s->blocks[*next_block];
2478     s->pb= pb[*next_block];
2479     if(s->data_partitioning){
2480         s->pb2   = pb2   [*next_block];
2481         s->tex_pb= tex_pb[*next_block];
2482     }
2483
2484     if(*next_block){
2485         memcpy(dest_backup, s->dest, sizeof(s->dest));
2486         s->dest[0] = s->rd_scratchpad;
2487         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2488         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2489         av_assert0(s->linesize >= 32); //FIXME
2490     }
2491
2492     encode_mb(s, motion_x, motion_y);
2493
2494     score= put_bits_count(&s->pb);
2495     if(s->data_partitioning){
2496         score+= put_bits_count(&s->pb2);
2497         score+= put_bits_count(&s->tex_pb);
2498     }
2499
2500     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2501         ff_MPV_decode_mb(s, s->block);
2502
2503         score *= s->lambda2;
2504         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2505     }
2506
2507     if(*next_block){
2508         memcpy(s->dest, dest_backup, sizeof(s->dest));
2509     }
2510
2511     if(score<*dmin){
2512         *dmin= score;
2513         *next_block^=1;
2514
2515         copy_context_after_encode(best, s, type);
2516     }
2517 }
2518
2519 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2520     uint32_t *sq = ff_square_tab + 256;
2521     int acc=0;
2522     int x,y;
2523
2524     if(w==16 && h==16)
2525         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2526     else if(w==8 && h==8)
2527         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2528
2529     for(y=0; y<h; y++){
2530         for(x=0; x<w; x++){
2531             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2532         }
2533     }
2534
2535     av_assert2(acc>=0);
2536
2537     return acc;
2538 }
2539
2540 static int sse_mb(MpegEncContext *s){
2541     int w= 16;
2542     int h= 16;
2543
2544     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2545     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2546
2547     if(w==16 && h==16)
2548       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2549         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2550                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2551                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2552       }else{
2553         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2554                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2555                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2556       }
2557     else
2558         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2559                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2560                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2561 }
2562
2563 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2564     MpegEncContext *s= *(void**)arg;
2565
2566
2567     s->me.pre_pass=1;
2568     s->me.dia_size= s->avctx->pre_dia_size;
2569     s->first_slice_line=1;
2570     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2571         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2572             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2573         }
2574         s->first_slice_line=0;
2575     }
2576
2577     s->me.pre_pass=0;
2578
2579     return 0;
2580 }
2581
2582 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2583     MpegEncContext *s= *(void**)arg;
2584
2585     ff_check_alignment();
2586
2587     s->me.dia_size= s->avctx->dia_size;
2588     s->first_slice_line=1;
2589     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2590         s->mb_x=0; //for block init below
2591         ff_init_block_index(s);
2592         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2593             s->block_index[0]+=2;
2594             s->block_index[1]+=2;
2595             s->block_index[2]+=2;
2596             s->block_index[3]+=2;
2597
2598             /* compute motion vector & mb_type and store in context */
2599             if(s->pict_type==AV_PICTURE_TYPE_B)
2600                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2601             else
2602                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2603         }
2604         s->first_slice_line=0;
2605     }
2606     return 0;
2607 }
2608
2609 static int mb_var_thread(AVCodecContext *c, void *arg){
2610     MpegEncContext *s= *(void**)arg;
2611     int mb_x, mb_y;
2612
2613     ff_check_alignment();
2614
2615     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2616         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2617             int xx = mb_x * 16;
2618             int yy = mb_y * 16;
2619             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2620             int varc;
2621             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2622
2623             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2624                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2625
2626             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2627             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2628             s->me.mb_var_sum_temp    += varc;
2629         }
2630     }
2631     return 0;
2632 }
2633
2634 static void write_slice_end(MpegEncContext *s){
2635     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2636         if(s->partitioned_frame){
2637             ff_mpeg4_merge_partitions(s);
2638         }
2639
2640         ff_mpeg4_stuffing(&s->pb);
2641     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2642         ff_mjpeg_encode_stuffing(s);
2643     }
2644
2645     avpriv_align_put_bits(&s->pb);
2646     flush_put_bits(&s->pb);
2647
2648     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2649         s->misc_bits+= get_bits_diff(s);
2650 }
2651
2652 static void write_mb_info(MpegEncContext *s)
2653 {
2654     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2655     int offset = put_bits_count(&s->pb);
2656     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2657     int gobn = s->mb_y / s->gob_index;
2658     int pred_x, pred_y;
2659     if (CONFIG_H263_ENCODER)
2660         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2661     bytestream_put_le32(&ptr, offset);
2662     bytestream_put_byte(&ptr, s->qscale);
2663     bytestream_put_byte(&ptr, gobn);
2664     bytestream_put_le16(&ptr, mba);
2665     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2666     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2667     /* 4MV not implemented */
2668     bytestream_put_byte(&ptr, 0); /* hmv2 */
2669     bytestream_put_byte(&ptr, 0); /* vmv2 */
2670 }
2671
2672 static void update_mb_info(MpegEncContext *s, int startcode)
2673 {
2674     if (!s->mb_info)
2675         return;
2676     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2677         s->mb_info_size += 12;
2678         s->prev_mb_info = s->last_mb_info;
2679     }
2680     if (startcode) {
2681         s->prev_mb_info = put_bits_count(&s->pb)/8;
2682         /* This might have incremented mb_info_size above, and we return without
2683          * actually writing any info into that slot yet. But in that case,
2684          * this will be called again at the start of the after writing the
2685          * start code, actually writing the mb info. */
2686         return;
2687     }
2688
2689     s->last_mb_info = put_bits_count(&s->pb)/8;
2690     if (!s->mb_info_size)
2691         s->mb_info_size += 12;
2692     write_mb_info(s);
2693 }
2694
2695 static int encode_thread(AVCodecContext *c, void *arg){
2696     MpegEncContext *s= *(void**)arg;
2697     int mb_x, mb_y, pdif = 0;
2698     int chr_h= 16>>s->chroma_y_shift;
2699     int i, j;
2700     MpegEncContext best_s, backup_s;
2701     uint8_t bit_buf[2][MAX_MB_BYTES];
2702     uint8_t bit_buf2[2][MAX_MB_BYTES];
2703     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2704     PutBitContext pb[2], pb2[2], tex_pb[2];
2705
2706     ff_check_alignment();
2707
2708     for(i=0; i<2; i++){
2709         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2710         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2711         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2712     }
2713
2714     s->last_bits= put_bits_count(&s->pb);
2715     s->mv_bits=0;
2716     s->misc_bits=0;
2717     s->i_tex_bits=0;
2718     s->p_tex_bits=0;
2719     s->i_count=0;
2720     s->f_count=0;
2721     s->b_count=0;
2722     s->skip_count=0;
2723
2724     for(i=0; i<3; i++){
2725         /* init last dc values */
2726         /* note: quant matrix value (8) is implied here */
2727         s->last_dc[i] = 128 << s->intra_dc_precision;
2728
2729         s->current_picture.error[i] = 0;
2730     }
2731     if(s->codec_id==AV_CODEC_ID_AMV){
2732         s->last_dc[0] = 128*8/13;
2733         s->last_dc[1] = 128*8/14;
2734         s->last_dc[2] = 128*8/14;
2735     }
2736     s->mb_skip_run = 0;
2737     memset(s->last_mv, 0, sizeof(s->last_mv));
2738
2739     s->last_mv_dir = 0;
2740
2741     switch(s->codec_id){
2742     case AV_CODEC_ID_H263:
2743     case AV_CODEC_ID_H263P:
2744     case AV_CODEC_ID_FLV1:
2745         if (CONFIG_H263_ENCODER)
2746             s->gob_index = ff_h263_get_gob_height(s);
2747         break;
2748     case AV_CODEC_ID_MPEG4:
2749         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2750             ff_mpeg4_init_partitions(s);
2751         break;
2752     }
2753
2754     s->resync_mb_x=0;
2755     s->resync_mb_y=0;
2756     s->first_slice_line = 1;
2757     s->ptr_lastgob = s->pb.buf;
2758     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2759         s->mb_x=0;
2760         s->mb_y= mb_y;
2761
2762         ff_set_qscale(s, s->qscale);
2763         ff_init_block_index(s);
2764
2765         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2766             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2767             int mb_type= s->mb_type[xy];
2768 //            int d;
2769             int dmin= INT_MAX;
2770             int dir;
2771
2772             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2773                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2774                 return -1;
2775             }
2776             if(s->data_partitioning){
2777                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2778                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2779                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2780                     return -1;
2781                 }
2782             }
2783
2784             s->mb_x = mb_x;
2785             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2786             ff_update_block_index(s);
2787
2788             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2789                 ff_h261_reorder_mb_index(s);
2790                 xy= s->mb_y*s->mb_stride + s->mb_x;
2791                 mb_type= s->mb_type[xy];
2792             }
2793
2794             /* write gob / video packet header  */
2795             if(s->rtp_mode){
2796                 int current_packet_size, is_gob_start;
2797
2798                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2799
2800                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2801
2802                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2803
2804                 switch(s->codec_id){
2805                 case AV_CODEC_ID_H263:
2806                 case AV_CODEC_ID_H263P:
2807                     if(!s->h263_slice_structured)
2808                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2809                     break;
2810                 case AV_CODEC_ID_MPEG2VIDEO:
2811                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2812                 case AV_CODEC_ID_MPEG1VIDEO:
2813                     if(s->mb_skip_run) is_gob_start=0;
2814                     break;
2815                 case AV_CODEC_ID_MJPEG:
2816                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2817                     break;
2818                 }
2819
2820                 if(is_gob_start){
2821                     if(s->start_mb_y != mb_y || mb_x!=0){
2822                         write_slice_end(s);
2823
2824                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2825                             ff_mpeg4_init_partitions(s);
2826                         }
2827                     }
2828
2829                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2830                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2831
2832                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2833                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2834                         int d = 100 / s->error_rate;
2835                         if(r % d == 0){
2836                             current_packet_size=0;
2837                             s->pb.buf_ptr= s->ptr_lastgob;
2838                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2839                         }
2840                     }
2841
2842                     if (s->avctx->rtp_callback){
2843                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2844                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2845                     }
2846                     update_mb_info(s, 1);
2847
2848                     switch(s->codec_id){
2849                     case AV_CODEC_ID_MPEG4:
2850                         if (CONFIG_MPEG4_ENCODER) {
2851                             ff_mpeg4_encode_video_packet_header(s);
2852                             ff_mpeg4_clean_buffers(s);
2853                         }
2854                     break;
2855                     case AV_CODEC_ID_MPEG1VIDEO:
2856                     case AV_CODEC_ID_MPEG2VIDEO:
2857                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2858                             ff_mpeg1_encode_slice_header(s);
2859                             ff_mpeg1_clean_buffers(s);
2860                         }
2861                     break;
2862                     case AV_CODEC_ID_H263:
2863                     case AV_CODEC_ID_H263P:
2864                         if (CONFIG_H263_ENCODER)
2865                             ff_h263_encode_gob_header(s, mb_y);
2866                     break;
2867                     }
2868
2869                     if(s->flags&CODEC_FLAG_PASS1){
2870                         int bits= put_bits_count(&s->pb);
2871                         s->misc_bits+= bits - s->last_bits;
2872                         s->last_bits= bits;
2873                     }
2874
2875                     s->ptr_lastgob += current_packet_size;
2876                     s->first_slice_line=1;
2877                     s->resync_mb_x=mb_x;
2878                     s->resync_mb_y=mb_y;
2879                 }
2880             }
2881
2882             if(  (s->resync_mb_x   == s->mb_x)
2883                && s->resync_mb_y+1 == s->mb_y){
2884                 s->first_slice_line=0;
2885             }
2886
2887             s->mb_skipped=0;
2888             s->dquant=0; //only for QP_RD
2889
2890             update_mb_info(s, 0);
2891
2892             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2893                 int next_block=0;
2894                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2895
2896                 copy_context_before_encode(&backup_s, s, -1);
2897                 backup_s.pb= s->pb;
2898                 best_s.data_partitioning= s->data_partitioning;
2899                 best_s.partitioned_frame= s->partitioned_frame;
2900                 if(s->data_partitioning){
2901                     backup_s.pb2= s->pb2;
2902                     backup_s.tex_pb= s->tex_pb;
2903                 }
2904
2905                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2906                     s->mv_dir = MV_DIR_FORWARD;
2907                     s->mv_type = MV_TYPE_16X16;
2908                     s->mb_intra= 0;
2909                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2910                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2911                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2912                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2913                 }
2914                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2915                     s->mv_dir = MV_DIR_FORWARD;
2916                     s->mv_type = MV_TYPE_FIELD;
2917                     s->mb_intra= 0;
2918                     for(i=0; i<2; i++){
2919                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2920                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2921                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2922                     }
2923                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2924                                  &dmin, &next_block, 0, 0);
2925                 }
2926                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2927                     s->mv_dir = MV_DIR_FORWARD;
2928                     s->mv_type = MV_TYPE_16X16;
2929                     s->mb_intra= 0;
2930                     s->mv[0][0][0] = 0;
2931                     s->mv[0][0][1] = 0;
2932                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2933                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2934                 }
2935                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2936                     s->mv_dir = MV_DIR_FORWARD;
2937                     s->mv_type = MV_TYPE_8X8;
2938                     s->mb_intra= 0;
2939                     for(i=0; i<4; i++){
2940                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2941                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2942                     }
2943                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2944                                  &dmin, &next_block, 0, 0);
2945                 }
2946                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2947                     s->mv_dir = MV_DIR_FORWARD;
2948                     s->mv_type = MV_TYPE_16X16;
2949                     s->mb_intra= 0;
2950                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2951                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2952                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2953                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2954                 }
2955                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2956                     s->mv_dir = MV_DIR_BACKWARD;
2957                     s->mv_type = MV_TYPE_16X16;
2958                     s->mb_intra= 0;
2959                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2960                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2961                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2962                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2963                 }
2964                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2965                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2966                     s->mv_type = MV_TYPE_16X16;
2967                     s->mb_intra= 0;
2968                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2969                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2970                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2971                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2972                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2973                                  &dmin, &next_block, 0, 0);
2974                 }
2975                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2976                     s->mv_dir = MV_DIR_FORWARD;
2977                     s->mv_type = MV_TYPE_FIELD;
2978                     s->mb_intra= 0;
2979                     for(i=0; i<2; i++){
2980                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2981                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2982                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2983                     }
2984                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2985                                  &dmin, &next_block, 0, 0);
2986                 }
2987                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2988                     s->mv_dir = MV_DIR_BACKWARD;
2989                     s->mv_type = MV_TYPE_FIELD;
2990                     s->mb_intra= 0;
2991                     for(i=0; i<2; i++){
2992                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2993                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2994                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2995                     }
2996                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2997                                  &dmin, &next_block, 0, 0);
2998                 }
2999                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3000                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3001                     s->mv_type = MV_TYPE_FIELD;
3002                     s->mb_intra= 0;
3003                     for(dir=0; dir<2; dir++){
3004                         for(i=0; i<2; i++){
3005                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3006                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3007                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3008                         }
3009                     }
3010                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3011                                  &dmin, &next_block, 0, 0);
3012                 }
3013                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3014                     s->mv_dir = 0;
3015                     s->mv_type = MV_TYPE_16X16;
3016                     s->mb_intra= 1;
3017                     s->mv[0][0][0] = 0;
3018                     s->mv[0][0][1] = 0;
3019                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3020                                  &dmin, &next_block, 0, 0);
3021                     if(s->h263_pred || s->h263_aic){
3022                         if(best_s.mb_intra)
3023                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3024                         else
3025                             ff_clean_intra_table_entries(s); //old mode?
3026                     }
3027                 }
3028
3029                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3030                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3031                         const int last_qp= backup_s.qscale;
3032                         int qpi, qp, dc[6];
3033                         int16_t ac[6][16];
3034                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3035                         static const int dquant_tab[4]={-1,1,-2,2};
3036                         int storecoefs = s->mb_intra && s->dc_val[0];
3037
3038                         av_assert2(backup_s.dquant == 0);
3039
3040                         //FIXME intra
3041                         s->mv_dir= best_s.mv_dir;
3042                         s->mv_type = MV_TYPE_16X16;
3043                         s->mb_intra= best_s.mb_intra;
3044                         s->mv[0][0][0] = best_s.mv[0][0][0];
3045                         s->mv[0][0][1] = best_s.mv[0][0][1];
3046                         s->mv[1][0][0] = best_s.mv[1][0][0];
3047                         s->mv[1][0][1] = best_s.mv[1][0][1];
3048
3049                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3050                         for(; qpi<4; qpi++){
3051                             int dquant= dquant_tab[qpi];
3052                             qp= last_qp + dquant;
3053                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3054                                 continue;
3055                             backup_s.dquant= dquant;
3056                             if(storecoefs){
3057                                 for(i=0; i<6; i++){
3058                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3059                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3060                                 }
3061                             }
3062
3063                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3064                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3065                             if(best_s.qscale != qp){
3066                                 if(storecoefs){
3067                                     for(i=0; i<6; i++){
3068                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3069                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3070                                     }
3071                                 }
3072                             }
3073                         }
3074                     }
3075                 }
3076                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3077                     int mx= s->b_direct_mv_table[xy][0];
3078                     int my= s->b_direct_mv_table[xy][1];
3079
3080                     backup_s.dquant = 0;
3081                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3082                     s->mb_intra= 0;
3083                     ff_mpeg4_set_direct_mv(s, mx, my);
3084                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3085                                  &dmin, &next_block, mx, my);
3086                 }
3087                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3088                     backup_s.dquant = 0;
3089                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3090                     s->mb_intra= 0;
3091                     ff_mpeg4_set_direct_mv(s, 0, 0);
3092                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3093                                  &dmin, &next_block, 0, 0);
3094                 }
3095                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3096                     int coded=0;
3097                     for(i=0; i<6; i++)
3098                         coded |= s->block_last_index[i];
3099                     if(coded){
3100                         int mx,my;
3101                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3102                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3103                             mx=my=0; //FIXME find the one we actually used
3104                             ff_mpeg4_set_direct_mv(s, mx, my);
3105                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3106                             mx= s->mv[1][0][0];
3107                             my= s->mv[1][0][1];
3108                         }else{
3109                             mx= s->mv[0][0][0];
3110                             my= s->mv[0][0][1];
3111                         }
3112
3113                         s->mv_dir= best_s.mv_dir;
3114                         s->mv_type = best_s.mv_type;
3115                         s->mb_intra= 0;
3116 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3117                         s->mv[0][0][1] = best_s.mv[0][0][1];
3118                         s->mv[1][0][0] = best_s.mv[1][0][0];
3119                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3120                         backup_s.dquant= 0;
3121                         s->skipdct=1;
3122                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3123                                         &dmin, &next_block, mx, my);
3124                         s->skipdct=0;
3125                     }
3126                 }
3127
3128                 s->current_picture.qscale_table[xy] = best_s.qscale;
3129
3130                 copy_context_after_encode(s, &best_s, -1);
3131
3132                 pb_bits_count= put_bits_count(&s->pb);
3133                 flush_put_bits(&s->pb);
3134                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3135                 s->pb= backup_s.pb;
3136
3137                 if(s->data_partitioning){
3138                     pb2_bits_count= put_bits_count(&s->pb2);
3139                     flush_put_bits(&s->pb2);
3140                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3141                     s->pb2= backup_s.pb2;
3142
3143                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3144                     flush_put_bits(&s->tex_pb);
3145                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3146                     s->tex_pb= backup_s.tex_pb;
3147                 }
3148                 s->last_bits= put_bits_count(&s->pb);
3149
3150                 if (CONFIG_H263_ENCODER &&
3151                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3152                     ff_h263_update_motion_val(s);
3153
3154                 if(next_block==0){ //FIXME 16 vs linesize16
3155                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3156                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3157                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3158                 }
3159
3160                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3161                     ff_MPV_decode_mb(s, s->block);
3162             } else {
3163                 int motion_x = 0, motion_y = 0;
3164                 s->mv_type=MV_TYPE_16X16;
3165                 // only one MB-Type possible
3166
3167                 switch(mb_type){
3168                 case CANDIDATE_MB_TYPE_INTRA:
3169                     s->mv_dir = 0;
3170                     s->mb_intra= 1;
3171                     motion_x= s->mv[0][0][0] = 0;
3172                     motion_y= s->mv[0][0][1] = 0;
3173                     break;
3174                 case CANDIDATE_MB_TYPE_INTER:
3175                     s->mv_dir = MV_DIR_FORWARD;
3176                     s->mb_intra= 0;
3177                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3178                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3179                     break;
3180                 case CANDIDATE_MB_TYPE_INTER_I:
3181                     s->mv_dir = MV_DIR_FORWARD;
3182                     s->mv_type = MV_TYPE_FIELD;
3183                     s->mb_intra= 0;
3184                     for(i=0; i<2; i++){
3185                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3186                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3187                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3188                     }
3189                     break;
3190                 case CANDIDATE_MB_TYPE_INTER4V:
3191                     s->mv_dir = MV_DIR_FORWARD;
3192                     s->mv_type = MV_TYPE_8X8;
3193                     s->mb_intra= 0;
3194                     for(i=0; i<4; i++){
3195                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3196                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3197                     }
3198                     break;
3199                 case CANDIDATE_MB_TYPE_DIRECT:
3200                     if (CONFIG_MPEG4_ENCODER) {
3201                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3202                         s->mb_intra= 0;
3203                         motion_x=s->b_direct_mv_table[xy][0];
3204                         motion_y=s->b_direct_mv_table[xy][1];
3205                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3206                     }
3207                     break;
3208                 case CANDIDATE_MB_TYPE_DIRECT0:
3209                     if (CONFIG_MPEG4_ENCODER) {
3210                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3211                         s->mb_intra= 0;
3212                         ff_mpeg4_set_direct_mv(s, 0, 0);
3213                     }
3214                     break;
3215                 case CANDIDATE_MB_TYPE_BIDIR:
3216                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3217                     s->mb_intra= 0;
3218                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3219                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3220                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3221                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3222                     break;
3223                 case CANDIDATE_MB_TYPE_BACKWARD:
3224                     s->mv_dir = MV_DIR_BACKWARD;
3225                     s->mb_intra= 0;
3226                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3227                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3228                     break;
3229                 case CANDIDATE_MB_TYPE_FORWARD:
3230                     s->mv_dir = MV_DIR_FORWARD;
3231                     s->mb_intra= 0;
3232                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3233                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3234                     break;
3235                 case CANDIDATE_MB_TYPE_FORWARD_I:
3236                     s->mv_dir = MV_DIR_FORWARD;
3237                     s->mv_type = MV_TYPE_FIELD;
3238                     s->mb_intra= 0;
3239                     for(i=0; i<2; i++){
3240                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3241                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3242                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3243                     }
3244                     break;
3245                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3246                     s->mv_dir = MV_DIR_BACKWARD;
3247                     s->mv_type = MV_TYPE_FIELD;
3248                     s->mb_intra= 0;
3249                     for(i=0; i<2; i++){
3250                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3251                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3252                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3253                     }
3254                     break;
3255                 case CANDIDATE_MB_TYPE_BIDIR_I:
3256                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3257                     s->mv_type = MV_TYPE_FIELD;
3258                     s->mb_intra= 0;
3259                     for(dir=0; dir<2; dir++){
3260                         for(i=0; i<2; i++){
3261                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3262                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3263                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3264                         }
3265                     }
3266                     break;
3267                 default:
3268                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3269                 }
3270
3271                 encode_mb(s, motion_x, motion_y);
3272
3273                 // RAL: Update last macroblock type
3274                 s->last_mv_dir = s->mv_dir;
3275
3276                 if (CONFIG_H263_ENCODER &&
3277                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3278                     ff_h263_update_motion_val(s);
3279
3280                 ff_MPV_decode_mb(s, s->block);
3281             }
3282
3283             /* clean the MV table in IPS frames for direct mode in B frames */
3284             if(s->mb_intra /* && I,P,S_TYPE */){
3285                 s->p_mv_table[xy][0]=0;
3286                 s->p_mv_table[xy][1]=0;
3287             }
3288
3289             if(s->flags&CODEC_FLAG_PSNR){
3290                 int w= 16;
3291                 int h= 16;
3292
3293                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3294                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3295
3296                 s->current_picture.error[0] += sse(
3297                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3298                     s->dest[0], w, h, s->linesize);
3299                 s->current_picture.error[1] += sse(
3300                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3301                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3302                 s->current_picture.error[2] += sse(
3303                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3304                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3305             }
3306             if(s->loop_filter){
3307                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3308                     ff_h263_loop_filter(s);
3309             }
3310             av_dlog(s->avctx, "MB %d %d bits\n",
3311                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3312         }
3313     }
3314
3315     //not beautiful here but we must write it before flushing so it has to be here
3316     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3317         ff_msmpeg4_encode_ext_header(s);
3318
3319     write_slice_end(s);
3320
3321     /* Send the last GOB if RTP */
3322     if (s->avctx->rtp_callback) {
3323         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3324         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3325         /* Call the RTP callback to send the last GOB */
3326         emms_c();
3327         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3328     }
3329
3330     return 0;
3331 }
3332
3333 #define MERGE(field) dst->field += src->field; src->field=0
3334 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3335     MERGE(me.scene_change_score);
3336     MERGE(me.mc_mb_var_sum_temp);
3337     MERGE(me.mb_var_sum_temp);
3338 }
3339
3340 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3341     int i;
3342
3343     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3344     MERGE(dct_count[1]);
3345     MERGE(mv_bits);
3346     MERGE(i_tex_bits);
3347     MERGE(p_tex_bits);
3348     MERGE(i_count);
3349     MERGE(f_count);
3350     MERGE(b_count);
3351     MERGE(skip_count);
3352     MERGE(misc_bits);
3353     MERGE(er.error_count);
3354     MERGE(padding_bug_score);
3355     MERGE(current_picture.error[0]);
3356     MERGE(current_picture.error[1]);
3357     MERGE(current_picture.error[2]);
3358
3359     if(dst->avctx->noise_reduction){
3360         for(i=0; i<64; i++){
3361             MERGE(dct_error_sum[0][i]);
3362             MERGE(dct_error_sum[1][i]);
3363         }
3364     }
3365
3366     assert(put_bits_count(&src->pb) % 8 ==0);
3367     assert(put_bits_count(&dst->pb) % 8 ==0);
3368     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3369     flush_put_bits(&dst->pb);
3370 }
3371
3372 static int estimate_qp(MpegEncContext *s, int dry_run){
3373     if (s->next_lambda){
3374         s->current_picture_ptr->f->quality =
3375         s->current_picture.f->quality = s->next_lambda;
3376         if(!dry_run) s->next_lambda= 0;
3377     } else if (!s->fixed_qscale) {
3378         s->current_picture_ptr->f->quality =
3379         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3380         if (s->current_picture.f->quality < 0)
3381             return -1;
3382     }
3383
3384     if(s->adaptive_quant){
3385         switch(s->codec_id){
3386         case AV_CODEC_ID_MPEG4:
3387             if (CONFIG_MPEG4_ENCODER)
3388                 ff_clean_mpeg4_qscales(s);
3389             break;
3390         case AV_CODEC_ID_H263:
3391         case AV_CODEC_ID_H263P:
3392         case AV_CODEC_ID_FLV1:
3393             if (CONFIG_H263_ENCODER)
3394                 ff_clean_h263_qscales(s);
3395             break;
3396         default:
3397             ff_init_qscale_tab(s);
3398         }
3399
3400         s->lambda= s->lambda_table[0];
3401         //FIXME broken
3402     }else
3403         s->lambda = s->current_picture.f->quality;
3404     update_qscale(s);
3405     return 0;
3406 }
3407
3408 /* must be called before writing the header */
3409 static void set_frame_distances(MpegEncContext * s){
3410     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3411     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3412
3413     if(s->pict_type==AV_PICTURE_TYPE_B){
3414         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3415         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3416     }else{
3417         s->pp_time= s->time - s->last_non_b_time;
3418         s->last_non_b_time= s->time;
3419         assert(s->picture_number==0 || s->pp_time > 0);
3420     }
3421 }
3422
3423 static int encode_picture(MpegEncContext *s, int picture_number)
3424 {
3425     int i, ret;
3426     int bits;
3427     int context_count = s->slice_context_count;
3428
3429     s->picture_number = picture_number;
3430
3431     /* Reset the average MB variance */
3432     s->me.mb_var_sum_temp    =
3433     s->me.mc_mb_var_sum_temp = 0;
3434
3435     /* we need to initialize some time vars before we can encode b-frames */
3436     // RAL: Condition added for MPEG1VIDEO
3437     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3438         set_frame_distances(s);
3439     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3440         ff_set_mpeg4_time(s);
3441
3442     s->me.scene_change_score=0;
3443
3444 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3445
3446     if(s->pict_type==AV_PICTURE_TYPE_I){
3447         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3448         else                        s->no_rounding=0;
3449     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3450         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3451             s->no_rounding ^= 1;
3452     }
3453
3454     if(s->flags & CODEC_FLAG_PASS2){
3455         if (estimate_qp(s,1) < 0)
3456             return -1;
3457         ff_get_2pass_fcode(s);
3458     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3459         if(s->pict_type==AV_PICTURE_TYPE_B)
3460             s->lambda= s->last_lambda_for[s->pict_type];
3461         else
3462             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3463         update_qscale(s);
3464     }
3465
3466     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3467         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3468         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3469         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3470         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3471     }
3472
3473     s->mb_intra=0; //for the rate distortion & bit compare functions
3474     for(i=1; i<context_count; i++){
3475         ret = ff_update_duplicate_context(s->thread_context[i], s);
3476         if (ret < 0)
3477             return ret;
3478     }
3479
3480     if(ff_init_me(s)<0)
3481         return -1;
3482
3483     /* Estimate motion for every MB */
3484     if(s->pict_type != AV_PICTURE_TYPE_I){
3485         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3486         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3487         if (s->pict_type != AV_PICTURE_TYPE_B) {
3488             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3489                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3490             }
3491         }
3492
3493         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3494     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3495         /* I-Frame */
3496         for(i=0; i<s->mb_stride*s->mb_height; i++)
3497             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3498
3499         if(!s->fixed_qscale){
3500             /* finding spatial complexity for I-frame rate control */
3501             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3502         }
3503     }
3504     for(i=1; i<context_count; i++){
3505         merge_context_after_me(s, s->thread_context[i]);
3506     }
3507     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3508     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3509     emms_c();
3510
3511     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3512         s->pict_type= AV_PICTURE_TYPE_I;
3513         for(i=0; i<s->mb_stride*s->mb_height; i++)
3514             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3515         if(s->msmpeg4_version >= 3)
3516             s->no_rounding=1;
3517         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3518                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3519     }
3520
3521     if(!s->umvplus){
3522         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3523             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3524
3525             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3526                 int a,b;
3527                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3528                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3529                 s->f_code= FFMAX3(s->f_code, a, b);
3530             }
3531
3532             ff_fix_long_p_mvs(s);
3533             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3534             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3535                 int j;
3536                 for(i=0; i<2; i++){
3537                     for(j=0; j<2; j++)
3538                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3539                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3540                 }
3541             }
3542         }
3543
3544         if(s->pict_type==AV_PICTURE_TYPE_B){
3545             int a, b;
3546
3547             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3548             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3549             s->f_code = FFMAX(a, b);
3550
3551             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3552             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3553             s->b_code = FFMAX(a, b);
3554
3555             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3556             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3557             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3558             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3559             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3560                 int dir, j;
3561                 for(dir=0; dir<2; dir++){
3562                     for(i=0; i<2; i++){
3563                         for(j=0; j<2; j++){
3564                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3565                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3566                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3567                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3568                         }
3569                     }
3570                 }
3571             }
3572         }
3573     }
3574
3575     if (estimate_qp(s, 0) < 0)
3576         return -1;
3577
3578     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3579         s->qscale= 3; //reduce clipping problems
3580
3581     if (s->out_format == FMT_MJPEG) {
3582         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3583         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3584
3585         if (s->avctx->intra_matrix) {
3586             chroma_matrix =
3587             luma_matrix = s->avctx->intra_matrix;
3588         }
3589         if (s->avctx->chroma_intra_matrix)
3590             chroma_matrix = s->avctx->chroma_intra_matrix;
3591
3592         /* for mjpeg, we do include qscale in the matrix */
3593         for(i=1;i<64;i++){
3594             int j = s->idsp.idct_permutation[i];
3595
3596             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3597             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3598         }
3599         s->y_dc_scale_table=
3600         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3601         s->chroma_intra_matrix[0] =
3602         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3603         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3604                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3605         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3606                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3607         s->qscale= 8;
3608     }
3609     if(s->codec_id == AV_CODEC_ID_AMV){
3610         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3611         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3612         for(i=1;i<64;i++){
3613             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3614
3615             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3616             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3617         }
3618         s->y_dc_scale_table= y;
3619         s->c_dc_scale_table= c;
3620         s->intra_matrix[0] = 13;
3621         s->chroma_intra_matrix[0] = 14;
3622         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3623                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3624         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3625                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3626         s->qscale= 8;
3627     }
3628
3629     //FIXME var duplication
3630     s->current_picture_ptr->f->key_frame =
3631     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3632     s->current_picture_ptr->f->pict_type =
3633     s->current_picture.f->pict_type = s->pict_type;
3634
3635     if (s->current_picture.f->key_frame)
3636         s->picture_in_gop_number=0;
3637
3638     s->mb_x = s->mb_y = 0;
3639     s->last_bits= put_bits_count(&s->pb);
3640     switch(s->out_format) {
3641     case FMT_MJPEG:
3642         if (CONFIG_MJPEG_ENCODER)
3643             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3644                                            s->intra_matrix, s->chroma_intra_matrix);
3645         break;
3646     case FMT_H261:
3647         if (CONFIG_H261_ENCODER)
3648             ff_h261_encode_picture_header(s, picture_number);
3649         break;
3650     case FMT_H263:
3651         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3652             ff_wmv2_encode_picture_header(s, picture_number);
3653         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3654             ff_msmpeg4_encode_picture_header(s, picture_number);
3655         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3656             ff_mpeg4_encode_picture_header(s, picture_number);
3657         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3658             ff_rv10_encode_picture_header(s, picture_number);
3659         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3660             ff_rv20_encode_picture_header(s, picture_number);
3661         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3662             ff_flv_encode_picture_header(s, picture_number);
3663         else if (CONFIG_H263_ENCODER)
3664             ff_h263_encode_picture_header(s, picture_number);
3665         break;
3666     case FMT_MPEG1:
3667         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3668             ff_mpeg1_encode_picture_header(s, picture_number);
3669         break;
3670     default:
3671         av_assert0(0);
3672     }
3673     bits= put_bits_count(&s->pb);
3674     s->header_bits= bits - s->last_bits;
3675
3676     for(i=1; i<context_count; i++){
3677         update_duplicate_context_after_me(s->thread_context[i], s);
3678     }
3679     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3680     for(i=1; i<context_count; i++){
3681         merge_context_after_encode(s, s->thread_context[i]);
3682     }
3683     emms_c();
3684     return 0;
3685 }
3686
3687 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3688     const int intra= s->mb_intra;
3689     int i;
3690
3691     s->dct_count[intra]++;
3692
3693     for(i=0; i<64; i++){
3694         int level= block[i];
3695
3696         if(level){
3697             if(level>0){
3698                 s->dct_error_sum[intra][i] += level;
3699                 level -= s->dct_offset[intra][i];
3700                 if(level<0) level=0;
3701             }else{
3702                 s->dct_error_sum[intra][i] -= level;
3703                 level += s->dct_offset[intra][i];
3704                 if(level>0) level=0;
3705             }
3706             block[i]= level;
3707         }
3708     }
3709 }
3710
3711 static int dct_quantize_trellis_c(MpegEncContext *s,
3712                                   int16_t *block, int n,
3713                                   int qscale, int *overflow){
3714     const int *qmat;
3715     const uint8_t *scantable= s->intra_scantable.scantable;
3716     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3717     int max=0;
3718     unsigned int threshold1, threshold2;
3719     int bias=0;
3720     int run_tab[65];
3721     int level_tab[65];
3722     int score_tab[65];
3723     int survivor[65];
3724     int survivor_count;
3725     int last_run=0;
3726     int last_level=0;
3727     int last_score= 0;
3728     int last_i;
3729     int coeff[2][64];
3730     int coeff_count[64];
3731     int qmul, qadd, start_i, last_non_zero, i, dc;
3732     const int esc_length= s->ac_esc_length;
3733     uint8_t * length;
3734     uint8_t * last_length;
3735     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3736
3737     s->fdsp.fdct(block);
3738
3739     if(s->dct_error_sum)
3740         s->denoise_dct(s, block);
3741     qmul= qscale*16;
3742     qadd= ((qscale-1)|1)*8;
3743
3744     if (s->mb_intra) {
3745         int q;
3746         if (!s->h263_aic) {
3747             if (n < 4)
3748                 q = s->y_dc_scale;
3749             else
3750                 q = s->c_dc_scale;
3751             q = q << 3;
3752         } else{
3753             /* For AIC we skip quant/dequant of INTRADC */
3754             q = 1 << 3;
3755             qadd=0;
3756         }
3757
3758         /* note: block[0] is assumed to be positive */
3759         block[0] = (block[0] + (q >> 1)) / q;
3760         start_i = 1;
3761         last_non_zero = 0;
3762         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3763         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3764             bias= 1<<(QMAT_SHIFT-1);
3765         length     = s->intra_ac_vlc_length;
3766         last_length= s->intra_ac_vlc_last_length;
3767     } else {
3768         start_i = 0;
3769         last_non_zero = -1;
3770         qmat = s->q_inter_matrix[qscale];
3771         length     = s->inter_ac_vlc_length;
3772         last_length= s->inter_ac_vlc_last_length;
3773     }
3774     last_i= start_i;
3775
3776     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3777     threshold2= (threshold1<<1);
3778
3779     for(i=63; i>=start_i; i--) {
3780         const int j = scantable[i];
3781         int level = block[j] * qmat[j];
3782
3783         if(((unsigned)(level+threshold1))>threshold2){
3784             last_non_zero = i;
3785             break;
3786         }
3787     }
3788
3789     for(i=start_i; i<=last_non_zero; i++) {
3790         const int j = scantable[i];
3791         int level = block[j] * qmat[j];
3792
3793 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3794 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3795         if(((unsigned)(level+threshold1))>threshold2){
3796             if(level>0){
3797                 level= (bias + level)>>QMAT_SHIFT;
3798                 coeff[0][i]= level;
3799                 coeff[1][i]= level-1;
3800 //                coeff[2][k]= level-2;
3801             }else{
3802                 level= (bias - level)>>QMAT_SHIFT;
3803                 coeff[0][i]= -level;
3804                 coeff[1][i]= -level+1;
3805 //                coeff[2][k]= -level+2;
3806             }
3807             coeff_count[i]= FFMIN(level, 2);
3808             av_assert2(coeff_count[i]);
3809             max |=level;
3810         }else{
3811             coeff[0][i]= (level>>31)|1;
3812             coeff_count[i]= 1;
3813         }
3814     }
3815
3816     *overflow= s->max_qcoeff < max; //overflow might have happened
3817
3818     if(last_non_zero < start_i){
3819         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3820         return last_non_zero;
3821     }
3822
3823     score_tab[start_i]= 0;
3824     survivor[0]= start_i;
3825     survivor_count= 1;
3826
3827     for(i=start_i; i<=last_non_zero; i++){
3828         int level_index, j, zero_distortion;
3829         int dct_coeff= FFABS(block[ scantable[i] ]);
3830         int best_score=256*256*256*120;
3831
3832         if (s->fdsp.fdct == ff_fdct_ifast)
3833             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3834         zero_distortion= dct_coeff*dct_coeff;
3835
3836         for(level_index=0; level_index < coeff_count[i]; level_index++){
3837             int distortion;
3838             int level= coeff[level_index][i];
3839             const int alevel= FFABS(level);
3840             int unquant_coeff;
3841
3842             av_assert2(level);
3843
3844             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3845                 unquant_coeff= alevel*qmul + qadd;
3846             }else{ //MPEG1
3847                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3848                 if(s->mb_intra){
3849                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3850                         unquant_coeff =   (unquant_coeff - 1) | 1;
3851                 }else{
3852                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3853                         unquant_coeff =   (unquant_coeff - 1) | 1;
3854                 }
3855                 unquant_coeff<<= 3;
3856             }
3857
3858             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3859             level+=64;
3860             if((level&(~127)) == 0){
3861                 for(j=survivor_count-1; j>=0; j--){
3862                     int run= i - survivor[j];
3863                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3864                     score += score_tab[i-run];
3865
3866                     if(score < best_score){
3867                         best_score= score;
3868                         run_tab[i+1]= run;
3869                         level_tab[i+1]= level-64;
3870                     }
3871                 }
3872
3873                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3874                     for(j=survivor_count-1; j>=0; j--){
3875                         int run= i - survivor[j];
3876                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3877                         score += score_tab[i-run];
3878                         if(score < last_score){
3879                             last_score= score;
3880                             last_run= run;
3881                             last_level= level-64;
3882                             last_i= i+1;
3883                         }
3884                     }
3885                 }
3886             }else{
3887                 distortion += esc_length*lambda;
3888                 for(j=survivor_count-1; j>=0; j--){
3889                     int run= i - survivor[j];
3890                     int score= distortion + score_tab[i-run];
3891
3892                     if(score < best_score){
3893                         best_score= score;
3894                         run_tab[i+1]= run;
3895                         level_tab[i+1]= level-64;
3896                     }
3897                 }
3898
3899                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3900                   for(j=survivor_count-1; j>=0; j--){
3901                         int run= i - survivor[j];
3902                         int score= distortion + score_tab[i-run];
3903                         if(score < last_score){
3904                             last_score= score;
3905                             last_run= run;
3906                             last_level= level-64;
3907                             last_i= i+1;
3908                         }
3909                     }
3910                 }
3911             }
3912         }
3913
3914         score_tab[i+1]= best_score;
3915
3916         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3917         if(last_non_zero <= 27){
3918             for(; survivor_count; survivor_count--){
3919                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3920                     break;
3921             }
3922         }else{
3923             for(; survivor_count; survivor_count--){
3924                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3925                     break;
3926             }
3927         }
3928
3929         survivor[ survivor_count++ ]= i+1;
3930     }
3931
3932     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3933         last_score= 256*256*256*120;
3934         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3935             int score= score_tab[i];
3936             if(i) score += lambda*2; //FIXME exacter?
3937
3938             if(score < last_score){
3939                 last_score= score;
3940                 last_i= i;
3941                 last_level= level_tab[i];
3942                 last_run= run_tab[i];
3943             }
3944         }
3945     }
3946
3947     s->coded_score[n] = last_score;
3948
3949     dc= FFABS(block[0]);
3950     last_non_zero= last_i - 1;
3951     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3952
3953     if(last_non_zero < start_i)
3954         return last_non_zero;
3955
3956     if(last_non_zero == 0 && start_i == 0){
3957         int best_level= 0;
3958         int best_score= dc * dc;
3959
3960         for(i=0; i<coeff_count[0]; i++){
3961             int level= coeff[i][0];
3962             int alevel= FFABS(level);
3963             int unquant_coeff, score, distortion;
3964
3965             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3966                     unquant_coeff= (alevel*qmul + qadd)>>3;
3967             }else{ //MPEG1
3968                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3969                     unquant_coeff =   (unquant_coeff - 1) | 1;
3970             }
3971             unquant_coeff = (unquant_coeff + 4) >> 3;
3972             unquant_coeff<<= 3 + 3;
3973
3974             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3975             level+=64;
3976             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3977             else                    score= distortion + esc_length*lambda;
3978
3979             if(score < best_score){
3980                 best_score= score;
3981                 best_level= level - 64;
3982             }
3983         }
3984         block[0]= best_level;
3985         s->coded_score[n] = best_score - dc*dc;
3986         if(best_level == 0) return -1;
3987         else                return last_non_zero;
3988     }
3989
3990     i= last_i;
3991     av_assert2(last_level);
3992
3993     block[ perm_scantable[last_non_zero] ]= last_level;
3994     i -= last_run + 1;
3995
3996     for(; i>start_i; i -= run_tab[i] + 1){
3997         block[ perm_scantable[i-1] ]= level_tab[i];
3998     }
3999
4000     return last_non_zero;
4001 }
4002
4003 //#define REFINE_STATS 1
4004 static int16_t basis[64][64];
4005
4006 static void build_basis(uint8_t *perm){
4007     int i, j, x, y;
4008     emms_c();
4009     for(i=0; i<8; i++){
4010         for(j=0; j<8; j++){
4011             for(y=0; y<8; y++){
4012                 for(x=0; x<8; x++){
4013                     double s= 0.25*(1<<BASIS_SHIFT);
4014                     int index= 8*i + j;
4015                     int perm_index= perm[index];
4016                     if(i==0) s*= sqrt(0.5);
4017                     if(j==0) s*= sqrt(0.5);
4018                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4019                 }
4020             }
4021         }
4022     }
4023 }
4024
4025 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4026                         int16_t *block, int16_t *weight, int16_t *orig,
4027                         int n, int qscale){
4028     int16_t rem[64];
4029     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4030     const uint8_t *scantable= s->intra_scantable.scantable;
4031     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4032 //    unsigned int threshold1, threshold2;
4033 //    int bias=0;
4034     int run_tab[65];
4035     int prev_run=0;
4036     int prev_level=0;
4037     int qmul, qadd, start_i, last_non_zero, i, dc;
4038     uint8_t * length;
4039     uint8_t * last_length;
4040     int lambda;
4041     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4042 #ifdef REFINE_STATS
4043 static int count=0;
4044 static int after_last=0;
4045 static int to_zero=0;
4046 static int from_zero=0;
4047 static int raise=0;
4048 static int lower=0;
4049 static int messed_sign=0;
4050 #endif
4051
4052     if(basis[0][0] == 0)
4053         build_basis(s->idsp.idct_permutation);
4054
4055     qmul= qscale*2;
4056     qadd= (qscale-1)|1;
4057     if (s->mb_intra) {
4058         if (!s->h263_aic) {
4059             if (n < 4)
4060                 q = s->y_dc_scale;
4061             else
4062                 q = s->c_dc_scale;
4063         } else{
4064             /* For AIC we skip quant/dequant of INTRADC */
4065             q = 1;
4066             qadd=0;
4067         }
4068         q <<= RECON_SHIFT-3;
4069         /* note: block[0] is assumed to be positive */
4070         dc= block[0]*q;
4071 //        block[0] = (block[0] + (q >> 1)) / q;
4072         start_i = 1;
4073 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4074 //            bias= 1<<(QMAT_SHIFT-1);
4075         length     = s->intra_ac_vlc_length;
4076         last_length= s->intra_ac_vlc_last_length;
4077     } else {
4078         dc= 0;
4079         start_i = 0;
4080         length     = s->inter_ac_vlc_length;
4081         last_length= s->inter_ac_vlc_last_length;
4082     }
4083     last_non_zero = s->block_last_index[n];
4084
4085 #ifdef REFINE_STATS
4086 {START_TIMER
4087 #endif
4088     dc += (1<<(RECON_SHIFT-1));
4089     for(i=0; i<64; i++){
4090         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4091     }
4092 #ifdef REFINE_STATS
4093 STOP_TIMER("memset rem[]")}
4094 #endif
4095     sum=0;
4096     for(i=0; i<64; i++){
4097         int one= 36;
4098         int qns=4;
4099         int w;
4100
4101         w= FFABS(weight[i]) + qns*one;
4102         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4103
4104         weight[i] = w;
4105 //        w=weight[i] = (63*qns + (w/2)) / w;
4106
4107         av_assert2(w>0);
4108         av_assert2(w<(1<<6));
4109         sum += w*w;
4110     }
4111     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4112 #ifdef REFINE_STATS
4113 {START_TIMER
4114 #endif
4115     run=0;
4116     rle_index=0;
4117     for(i=start_i; i<=last_non_zero; i++){
4118         int j= perm_scantable[i];
4119         const int level= block[j];
4120         int coeff;
4121
4122         if(level){
4123             if(level<0) coeff= qmul*level - qadd;
4124             else        coeff= qmul*level + qadd;
4125             run_tab[rle_index++]=run;
4126             run=0;
4127
4128             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4129         }else{
4130             run++;
4131         }
4132     }
4133 #ifdef REFINE_STATS
4134 if(last_non_zero>0){
4135 STOP_TIMER("init rem[]")
4136 }
4137 }
4138
4139 {START_TIMER
4140 #endif
4141     for(;;){
4142         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4143         int best_coeff=0;
4144         int best_change=0;
4145         int run2, best_unquant_change=0, analyze_gradient;
4146 #ifdef REFINE_STATS
4147 {START_TIMER
4148 #endif
4149         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4150
4151         if(analyze_gradient){
4152 #ifdef REFINE_STATS
4153 {START_TIMER
4154 #endif
4155             for(i=0; i<64; i++){
4156                 int w= weight[i];
4157
4158                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4159             }
4160 #ifdef REFINE_STATS
4161 STOP_TIMER("rem*w*w")}
4162 {START_TIMER
4163 #endif
4164             s->fdsp.fdct(d1);
4165 #ifdef REFINE_STATS
4166 STOP_TIMER("dct")}
4167 #endif
4168         }
4169
4170         if(start_i){
4171             const int level= block[0];
4172             int change, old_coeff;
4173
4174             av_assert2(s->mb_intra);
4175
4176             old_coeff= q*level;
4177
4178             for(change=-1; change<=1; change+=2){
4179                 int new_level= level + change;
4180                 int score, new_coeff;
4181
4182                 new_coeff= q*new_level;
4183                 if(new_coeff >= 2048 || new_coeff < 0)
4184                     continue;
4185
4186                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4187                                                   new_coeff - old_coeff);
4188                 if(score<best_score){
4189                     best_score= score;
4190                     best_coeff= 0;
4191                     best_change= change;
4192                     best_unquant_change= new_coeff - old_coeff;
4193                 }
4194             }
4195         }
4196
4197         run=0;
4198         rle_index=0;
4199         run2= run_tab[rle_index++];
4200         prev_level=0;
4201         prev_run=0;
4202
4203         for(i=start_i; i<64; i++){
4204             int j= perm_scantable[i];
4205             const int level= block[j];
4206             int change, old_coeff;
4207
4208             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4209                 break;
4210
4211             if(level){
4212                 if(level<0) old_coeff= qmul*level - qadd;
4213                 else        old_coeff= qmul*level + qadd;
4214                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4215             }else{
4216                 old_coeff=0;
4217                 run2--;
4218                 av_assert2(run2>=0 || i >= last_non_zero );
4219             }
4220
4221             for(change=-1; change<=1; change+=2){
4222                 int new_level= level + change;
4223                 int score, new_coeff, unquant_change;
4224
4225                 score=0;
4226                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4227                    continue;
4228
4229                 if(new_level){
4230                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4231                     else            new_coeff= qmul*new_level + qadd;
4232                     if(new_coeff >= 2048 || new_coeff <= -2048)
4233                         continue;
4234                     //FIXME check for overflow
4235
4236                     if(level){
4237                         if(level < 63 && level > -63){
4238                             if(i < last_non_zero)
4239                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4240                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4241                             else
4242                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4243                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4244                         }
4245                     }else{
4246                         av_assert2(FFABS(new_level)==1);
4247
4248                         if(analyze_gradient){
4249                             int g= d1[ scantable[i] ];
4250                             if(g && (g^new_level) >= 0)
4251                                 continue;
4252                         }
4253
4254                         if(i < last_non_zero){
4255                             int next_i= i + run2 + 1;
4256                             int next_level= block[ perm_scantable[next_i] ] + 64;
4257
4258                             if(next_level&(~127))
4259                                 next_level= 0;
4260
4261                             if(next_i < last_non_zero)
4262                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4263                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4264                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4265                             else
4266                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4267                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4268                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4269                         }else{
4270                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4271                             if(prev_level){
4272                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4273                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4274                             }
4275                         }
4276                     }
4277                 }else{
4278                     new_coeff=0;
4279                     av_assert2(FFABS(level)==1);
4280
4281                     if(i < last_non_zero){
4282                         int next_i= i + run2 + 1;
4283                         int next_level= block[ perm_scantable[next_i] ] + 64;
4284
4285                         if(next_level&(~127))
4286                             next_level= 0;
4287
4288                         if(next_i < last_non_zero)
4289                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4290                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4291                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4292                         else
4293                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4294                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4295                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4296                     }else{
4297                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4298                         if(prev_level){
4299                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4300                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4301                         }
4302                     }
4303                 }
4304
4305                 score *= lambda;
4306
4307                 unquant_change= new_coeff - old_coeff;
4308                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4309
4310                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4311                                                    unquant_change);
4312                 if(score<best_score){
4313                     best_score= score;
4314                     best_coeff= i;
4315                     best_change= change;
4316                     best_unquant_change= unquant_change;
4317                 }
4318             }
4319             if(level){
4320                 prev_level= level + 64;
4321                 if(prev_level&(~127))
4322                     prev_level= 0;
4323                 prev_run= run;
4324                 run=0;
4325             }else{
4326                 run++;
4327             }
4328         }
4329 #ifdef REFINE_STATS
4330 STOP_TIMER("iterative step")}
4331 #endif
4332
4333         if(best_change){
4334             int j= perm_scantable[ best_coeff ];
4335
4336             block[j] += best_change;
4337
4338             if(best_coeff > last_non_zero){
4339                 last_non_zero= best_coeff;
4340                 av_assert2(block[j]);
4341 #ifdef REFINE_STATS
4342 after_last++;
4343 #endif
4344             }else{
4345 #ifdef REFINE_STATS
4346 if(block[j]){
4347     if(block[j] - best_change){
4348         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4349             raise++;
4350         }else{
4351             lower++;
4352         }
4353     }else{
4354         from_zero++;
4355     }
4356 }else{
4357     to_zero++;
4358 }
4359 #endif
4360                 for(; last_non_zero>=start_i; last_non_zero--){
4361                     if(block[perm_scantable[last_non_zero]])
4362                         break;
4363                 }
4364             }
4365 #ifdef REFINE_STATS
4366 count++;
4367 if(256*256*256*64 % count == 0){
4368     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4369 }
4370 #endif
4371             run=0;
4372             rle_index=0;
4373             for(i=start_i; i<=last_non_zero; i++){
4374                 int j= perm_scantable[i];
4375                 const int level= block[j];
4376
4377                  if(level){
4378                      run_tab[rle_index++]=run;
4379                      run=0;
4380                  }else{
4381                      run++;
4382                  }
4383             }
4384
4385             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4386         }else{
4387             break;
4388         }
4389     }
4390 #ifdef REFINE_STATS
4391 if(last_non_zero>0){
4392 STOP_TIMER("iterative search")
4393 }
4394 }
4395 #endif
4396
4397     return last_non_zero;
4398 }
4399
4400 int ff_dct_quantize_c(MpegEncContext *s,
4401                         int16_t *block, int n,
4402                         int qscale, int *overflow)
4403 {
4404     int i, j, level, last_non_zero, q, start_i;
4405     const int *qmat;
4406     const uint8_t *scantable= s->intra_scantable.scantable;
4407     int bias;
4408     int max=0;
4409     unsigned int threshold1, threshold2;
4410
4411     s->fdsp.fdct(block);
4412
4413     if(s->dct_error_sum)
4414         s->denoise_dct(s, block);
4415
4416     if (s->mb_intra) {
4417         if (!s->h263_aic) {
4418             if (n < 4)
4419                 q = s->y_dc_scale;
4420             else
4421                 q = s->c_dc_scale;
4422             q = q << 3;
4423         } else
4424             /* For AIC we skip quant/dequant of INTRADC */
4425             q = 1 << 3;
4426
4427         /* note: block[0] is assumed to be positive */
4428         block[0] = (block[0] + (q >> 1)) / q;
4429         start_i = 1;
4430         last_non_zero = 0;
4431         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4432         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4433     } else {
4434         start_i = 0;
4435         last_non_zero = -1;
4436         qmat = s->q_inter_matrix[qscale];
4437         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4438     }
4439     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4440     threshold2= (threshold1<<1);
4441     for(i=63;i>=start_i;i--) {
4442         j = scantable[i];
4443         level = block[j] * qmat[j];
4444
4445         if(((unsigned)(level+threshold1))>threshold2){
4446             last_non_zero = i;
4447             break;
4448         }else{
4449             block[j]=0;
4450         }
4451     }
4452     for(i=start_i; i<=last_non_zero; i++) {
4453         j = scantable[i];
4454         level = block[j] * qmat[j];
4455
4456 //        if(   bias+level >= (1<<QMAT_SHIFT)
4457 //           || bias-level >= (1<<QMAT_SHIFT)){
4458         if(((unsigned)(level+threshold1))>threshold2){
4459             if(level>0){
4460                 level= (bias + level)>>QMAT_SHIFT;
4461                 block[j]= level;
4462             }else{
4463                 level= (bias - level)>>QMAT_SHIFT;
4464                 block[j]= -level;
4465             }
4466             max |=level;
4467         }else{
4468             block[j]=0;
4469         }
4470     }
4471     *overflow= s->max_qcoeff < max; //overflow might have happened
4472
4473     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4474     if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM)
4475         ff_block_permute(block, s->idsp.idct_permutation,
4476                          scantable, last_non_zero);
4477
4478     return last_non_zero;
4479 }
4480
4481 #define OFFSET(x) offsetof(MpegEncContext, x)
4482 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4483 static const AVOption h263_options[] = {
4484     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4485     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4486     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4487     FF_MPV_COMMON_OPTS
4488     { NULL },
4489 };
4490
4491 static const AVClass h263_class = {
4492     .class_name = "H.263 encoder",
4493     .item_name  = av_default_item_name,
4494     .option     = h263_options,
4495     .version    = LIBAVUTIL_VERSION_INT,
4496 };
4497
4498 AVCodec ff_h263_encoder = {
4499     .name           = "h263",
4500     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4501     .type           = AVMEDIA_TYPE_VIDEO,
4502     .id             = AV_CODEC_ID_H263,
4503     .priv_data_size = sizeof(MpegEncContext),
4504     .init           = ff_MPV_encode_init,
4505     .encode2        = ff_MPV_encode_picture,
4506     .close          = ff_MPV_encode_end,
4507     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4508     .priv_class     = &h263_class,
4509 };
4510
4511 static const AVOption h263p_options[] = {
4512     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4513     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4514     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4515     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4516     FF_MPV_COMMON_OPTS
4517     { NULL },
4518 };
4519 static const AVClass h263p_class = {
4520     .class_name = "H.263p encoder",
4521     .item_name  = av_default_item_name,
4522     .option     = h263p_options,
4523     .version    = LIBAVUTIL_VERSION_INT,
4524 };
4525
4526 AVCodec ff_h263p_encoder = {
4527     .name           = "h263p",
4528     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4529     .type           = AVMEDIA_TYPE_VIDEO,
4530     .id             = AV_CODEC_ID_H263P,
4531     .priv_data_size = sizeof(MpegEncContext),
4532     .init           = ff_MPV_encode_init,
4533     .encode2        = ff_MPV_encode_picture,
4534     .close          = ff_MPV_encode_end,
4535     .capabilities   = CODEC_CAP_SLICE_THREADS,
4536     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4537     .priv_class     = &h263p_class,
4538 };
4539
4540 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4541
4542 AVCodec ff_msmpeg4v2_encoder = {
4543     .name           = "msmpeg4v2",
4544     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4545     .type           = AVMEDIA_TYPE_VIDEO,
4546     .id             = AV_CODEC_ID_MSMPEG4V2,
4547     .priv_data_size = sizeof(MpegEncContext),
4548     .init           = ff_MPV_encode_init,
4549     .encode2        = ff_MPV_encode_picture,
4550     .close          = ff_MPV_encode_end,
4551     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4552     .priv_class     = &msmpeg4v2_class,
4553 };
4554
4555 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4556
4557 AVCodec ff_msmpeg4v3_encoder = {
4558     .name           = "msmpeg4",
4559     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4560     .type           = AVMEDIA_TYPE_VIDEO,
4561     .id             = AV_CODEC_ID_MSMPEG4V3,
4562     .priv_data_size = sizeof(MpegEncContext),
4563     .init           = ff_MPV_encode_init,
4564     .encode2        = ff_MPV_encode_picture,
4565     .close          = ff_MPV_encode_end,
4566     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4567     .priv_class     = &msmpeg4v3_class,
4568 };
4569
4570 FF_MPV_GENERIC_CLASS(wmv1)
4571
4572 AVCodec ff_wmv1_encoder = {
4573     .name           = "wmv1",
4574     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4575     .type           = AVMEDIA_TYPE_VIDEO,
4576     .id             = AV_CODEC_ID_WMV1,
4577     .priv_data_size = sizeof(MpegEncContext),
4578     .init           = ff_MPV_encode_init,
4579     .encode2        = ff_MPV_encode_picture,
4580     .close          = ff_MPV_encode_end,
4581     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4582     .priv_class     = &wmv1_class,
4583 };