]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec/mpegvideo_enc: dont use direct mode for unaligned input
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "dsputil.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mathops.h"
46 #include "mjpegenc.h"
47 #include "msmpeg4.h"
48 #include "faandct.h"
49 #include "thread.h"
50 #include "aandcttab.h"
51 #include "flv.h"
52 #include "mpeg4video.h"
53 #include "internal.h"
54 #include "bytestream.h"
55 #include <limits.h>
56 #include "sp5x.h"
57
58 static int encode_picture(MpegEncContext *s, int picture_number);
59 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
60 static int sse_mb(MpegEncContext *s);
61 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
62 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void update_duplicate_context_after_me(MpegEncContext *dst,
189                                               MpegEncContext *src)
190 {
191 #define COPY(a) dst->a= src->a
192     COPY(pict_type);
193     COPY(current_picture);
194     COPY(f_code);
195     COPY(b_code);
196     COPY(qscale);
197     COPY(lambda);
198     COPY(lambda2);
199     COPY(picture_in_gop_number);
200     COPY(gop_picture_number);
201     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
202     COPY(progressive_frame);    // FIXME don't set in encode_header
203     COPY(partitioned_frame);    // FIXME don't set in encode_header
204 #undef COPY
205 }
206
207 /**
208  * Set the given MpegEncContext to defaults for encoding.
209  * the changed fields will not depend upon the prior state of the MpegEncContext.
210  */
211 static void MPV_encode_defaults(MpegEncContext *s)
212 {
213     int i;
214     ff_MPV_common_defaults(s);
215
216     for (i = -16; i < 16; i++) {
217         default_fcode_tab[i + MAX_MV] = 1;
218     }
219     s->me.mv_penalty = default_mv_penalty;
220     s->fcode_tab     = default_fcode_tab;
221
222     s->input_picture_number  = 0;
223     s->picture_in_gop_number = 0;
224 }
225
226 av_cold int ff_dct_encode_init(MpegEncContext *s) {
227     if (ARCH_X86)
228         ff_dct_encode_init_x86(s);
229
230     if (CONFIG_H263_ENCODER)
231         ff_h263dsp_init(&s->h263dsp);
232     if (!s->dct_quantize)
233         s->dct_quantize = ff_dct_quantize_c;
234     if (!s->denoise_dct)
235         s->denoise_dct  = denoise_dct_c;
236     s->fast_dct_quantize = s->dct_quantize;
237     if (s->avctx->trellis)
238         s->dct_quantize  = dct_quantize_trellis_c;
239
240     return 0;
241 }
242
243 /* init video encoder */
244 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
245 {
246     MpegEncContext *s = avctx->priv_data;
247     int i, ret;
248
249     MPV_encode_defaults(s);
250
251     switch (avctx->codec_id) {
252     case AV_CODEC_ID_MPEG2VIDEO:
253         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
254             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
255             av_log(avctx, AV_LOG_ERROR,
256                    "only YUV420 and YUV422 are supported\n");
257             return -1;
258         }
259         break;
260     case AV_CODEC_ID_MJPEG:
261     case AV_CODEC_ID_AMV:
262         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
263             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
264             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
265             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
266               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
267               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
268              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
270             return -1;
271         }
272         break;
273     default:
274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
276             return -1;
277         }
278     }
279
280     switch (avctx->pix_fmt) {
281     case AV_PIX_FMT_YUVJ444P:
282     case AV_PIX_FMT_YUV444P:
283         s->chroma_format = CHROMA_444;
284         break;
285     case AV_PIX_FMT_YUVJ422P:
286     case AV_PIX_FMT_YUV422P:
287         s->chroma_format = CHROMA_422;
288         break;
289     case AV_PIX_FMT_YUVJ420P:
290     case AV_PIX_FMT_YUV420P:
291     default:
292         s->chroma_format = CHROMA_420;
293         break;
294     }
295
296     s->bit_rate = avctx->bit_rate;
297     s->width    = avctx->width;
298     s->height   = avctx->height;
299     if (avctx->gop_size > 600 &&
300         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
301         av_log(avctx, AV_LOG_WARNING,
302                "keyframe interval too large!, reducing it from %d to %d\n",
303                avctx->gop_size, 600);
304         avctx->gop_size = 600;
305     }
306     s->gop_size     = avctx->gop_size;
307     s->avctx        = avctx;
308     s->flags        = avctx->flags;
309     s->flags2       = avctx->flags2;
310     if (avctx->max_b_frames > MAX_B_FRAMES) {
311         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
312                "is %d.\n", MAX_B_FRAMES);
313         avctx->max_b_frames = MAX_B_FRAMES;
314     }
315     s->max_b_frames = avctx->max_b_frames;
316     s->codec_id     = avctx->codec->id;
317     s->strict_std_compliance = avctx->strict_std_compliance;
318     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
319     s->mpeg_quant         = avctx->mpeg_quant;
320     s->rtp_mode           = !!avctx->rtp_payload_size;
321     s->intra_dc_precision = avctx->intra_dc_precision;
322     s->user_specified_pts = AV_NOPTS_VALUE;
323
324     if (s->gop_size <= 1) {
325         s->intra_only = 1;
326         s->gop_size   = 12;
327     } else {
328         s->intra_only = 0;
329     }
330
331     s->me_method = avctx->me_method;
332
333     /* Fixed QSCALE */
334     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
335
336     s->adaptive_quant = (s->avctx->lumi_masking ||
337                          s->avctx->dark_masking ||
338                          s->avctx->temporal_cplx_masking ||
339                          s->avctx->spatial_cplx_masking  ||
340                          s->avctx->p_masking      ||
341                          s->avctx->border_masking ||
342                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
343                         !s->fixed_qscale;
344
345     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
346
347     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
348         switch(avctx->codec_id) {
349         case AV_CODEC_ID_MPEG1VIDEO:
350         case AV_CODEC_ID_MPEG2VIDEO:
351             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
352             break;
353         case AV_CODEC_ID_MPEG4:
354         case AV_CODEC_ID_MSMPEG4V1:
355         case AV_CODEC_ID_MSMPEG4V2:
356         case AV_CODEC_ID_MSMPEG4V3:
357             if       (avctx->rc_max_rate >= 15000000) {
358                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
359             } else if(avctx->rc_max_rate >=  2000000) {
360                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
361             } else if(avctx->rc_max_rate >=   384000) {
362                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
363             } else
364                 avctx->rc_buffer_size = 40;
365             avctx->rc_buffer_size *= 16384;
366             break;
367         }
368         if (avctx->rc_buffer_size) {
369             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
370         }
371     }
372
373     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
374         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
375         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
376             return -1;
377     }
378
379     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
380         av_log(avctx, AV_LOG_INFO,
381                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
382     }
383
384     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
385         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
386         return -1;
387     }
388
389     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
390         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
391         return -1;
392     }
393
394     if (avctx->rc_max_rate &&
395         avctx->rc_max_rate == avctx->bit_rate &&
396         avctx->rc_max_rate != avctx->rc_min_rate) {
397         av_log(avctx, AV_LOG_INFO,
398                "impossible bitrate constraints, this will fail\n");
399     }
400
401     if (avctx->rc_buffer_size &&
402         avctx->bit_rate * (int64_t)avctx->time_base.num >
403             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
404         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
405         return -1;
406     }
407
408     if (!s->fixed_qscale &&
409         avctx->bit_rate * av_q2d(avctx->time_base) >
410             avctx->bit_rate_tolerance) {
411         av_log(avctx, AV_LOG_ERROR,
412                "bitrate tolerance %d too small for bitrate %d\n", avctx->bit_rate_tolerance, avctx->bit_rate);
413         return -1;
414     }
415
416     if (s->avctx->rc_max_rate &&
417         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
418         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
419          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
420         90000LL * (avctx->rc_buffer_size - 1) >
421             s->avctx->rc_max_rate * 0xFFFFLL) {
422         av_log(avctx, AV_LOG_INFO,
423                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
424                "specified vbv buffer is too large for the given bitrate!\n");
425     }
426
427     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
428         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
429         s->codec_id != AV_CODEC_ID_FLV1) {
430         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
431         return -1;
432     }
433
434     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
435         av_log(avctx, AV_LOG_ERROR,
436                "OBMC is only supported with simple mb decision\n");
437         return -1;
438     }
439
440     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
441         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
442         return -1;
443     }
444
445     if (s->max_b_frames                    &&
446         s->codec_id != AV_CODEC_ID_MPEG4      &&
447         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
448         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
449         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
450         return -1;
451     }
452     if (s->max_b_frames < 0) {
453         av_log(avctx, AV_LOG_ERROR,
454                "max b frames must be 0 or positive for mpegvideo based encoders\n");
455         return -1;
456     }
457
458     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
459          s->codec_id == AV_CODEC_ID_H263  ||
460          s->codec_id == AV_CODEC_ID_H263P) &&
461         (avctx->sample_aspect_ratio.num > 255 ||
462          avctx->sample_aspect_ratio.den > 255)) {
463         av_log(avctx, AV_LOG_WARNING,
464                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
465                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
466         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
467                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
468     }
469
470     if ((s->codec_id == AV_CODEC_ID_H263  ||
471          s->codec_id == AV_CODEC_ID_H263P) &&
472         (avctx->width  > 2048 ||
473          avctx->height > 1152 )) {
474         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
475         return -1;
476     }
477     if ((s->codec_id == AV_CODEC_ID_H263  ||
478          s->codec_id == AV_CODEC_ID_H263P) &&
479         ((avctx->width &3) ||
480          (avctx->height&3) )) {
481         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
482         return -1;
483     }
484
485     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
486         (avctx->width  > 4095 ||
487          avctx->height > 4095 )) {
488         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
489         return -1;
490     }
491
492     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
493         (avctx->width  > 16383 ||
494          avctx->height > 16383 )) {
495         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
496         return -1;
497     }
498
499     if (s->codec_id == AV_CODEC_ID_RV10 &&
500         (avctx->width &15 ||
501          avctx->height&15 )) {
502         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
503         return AVERROR(EINVAL);
504     }
505
506     if (s->codec_id == AV_CODEC_ID_RV20 &&
507         (avctx->width &3 ||
508          avctx->height&3 )) {
509         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
510         return AVERROR(EINVAL);
511     }
512
513     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
514          s->codec_id == AV_CODEC_ID_WMV2) &&
515          avctx->width & 1) {
516          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
517          return -1;
518     }
519
520     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
521         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
522         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
523         return -1;
524     }
525
526     // FIXME mpeg2 uses that too
527     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
528                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
529         av_log(avctx, AV_LOG_ERROR,
530                "mpeg2 style quantization not supported by codec\n");
531         return -1;
532     }
533
534     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
535         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
536         return -1;
537     }
538
539     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
540         s->avctx->mb_decision != FF_MB_DECISION_RD) {
541         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
542         return -1;
543     }
544
545     if (s->avctx->scenechange_threshold < 1000000000 &&
546         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
547         av_log(avctx, AV_LOG_ERROR,
548                "closed gop with scene change detection are not supported yet, "
549                "set threshold to 1000000000\n");
550         return -1;
551     }
552
553     if (s->flags & CODEC_FLAG_LOW_DELAY) {
554         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
555             av_log(avctx, AV_LOG_ERROR,
556                   "low delay forcing is only available for mpeg2\n");
557             return -1;
558         }
559         if (s->max_b_frames != 0) {
560             av_log(avctx, AV_LOG_ERROR,
561                    "b frames cannot be used with low delay\n");
562             return -1;
563         }
564     }
565
566     if (s->q_scale_type == 1) {
567         if (avctx->qmax > 12) {
568             av_log(avctx, AV_LOG_ERROR,
569                    "non linear quant only supports qmax <= 12 currently\n");
570             return -1;
571         }
572     }
573
574     if (s->avctx->thread_count > 1         &&
575         s->codec_id != AV_CODEC_ID_MPEG4      &&
576         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
577         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
578         s->codec_id != AV_CODEC_ID_MJPEG      &&
579         (s->codec_id != AV_CODEC_ID_H263P)) {
580         av_log(avctx, AV_LOG_ERROR,
581                "multi threaded encoding not supported by codec\n");
582         return -1;
583     }
584
585     if (s->avctx->thread_count < 1) {
586         av_log(avctx, AV_LOG_ERROR,
587                "automatic thread number detection not supported by codec, "
588                "patch welcome\n");
589         return -1;
590     }
591
592     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
593         s->rtp_mode = 1;
594
595     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
596         s->h263_slice_structured = 1;
597
598     if (!avctx->time_base.den || !avctx->time_base.num) {
599         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
600         return -1;
601     }
602
603     i = (INT_MAX / 2 + 128) >> 8;
604     if (avctx->mb_threshold >= i) {
605         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
606                i - 1);
607         return -1;
608     }
609
610     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
611         av_log(avctx, AV_LOG_INFO,
612                "notice: b_frame_strategy only affects the first pass\n");
613         avctx->b_frame_strategy = 0;
614     }
615
616     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
617     if (i > 1) {
618         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
619         avctx->time_base.den /= i;
620         avctx->time_base.num /= i;
621         //return -1;
622     }
623
624     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
625         // (a + x * 3 / 8) / x
626         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
627         s->inter_quant_bias = 0;
628     } else {
629         s->intra_quant_bias = 0;
630         // (a - x / 4) / x
631         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
632     }
633
634     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
635         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
636         return AVERROR(EINVAL);
637     }
638
639     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
640         s->intra_quant_bias = avctx->intra_quant_bias;
641     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->inter_quant_bias = avctx->inter_quant_bias;
643
644     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
645
646     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
647         s->avctx->time_base.den > (1 << 16) - 1) {
648         av_log(avctx, AV_LOG_ERROR,
649                "timebase %d/%d not supported by MPEG 4 standard, "
650                "the maximum admitted value for the timebase denominator "
651                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
652                (1 << 16) - 1);
653         return -1;
654     }
655     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
656
657     switch (avctx->codec->id) {
658     case AV_CODEC_ID_MPEG1VIDEO:
659         s->out_format = FMT_MPEG1;
660         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
661         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
662         break;
663     case AV_CODEC_ID_MPEG2VIDEO:
664         s->out_format = FMT_MPEG1;
665         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
666         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
667         s->rtp_mode   = 1;
668         break;
669     case AV_CODEC_ID_MJPEG:
670     case AV_CODEC_ID_AMV:
671         s->out_format = FMT_MJPEG;
672         s->intra_only = 1; /* force intra only for jpeg */
673         if (!CONFIG_MJPEG_ENCODER ||
674             ff_mjpeg_encode_init(s) < 0)
675             return -1;
676         avctx->delay = 0;
677         s->low_delay = 1;
678         break;
679     case AV_CODEC_ID_H261:
680         if (!CONFIG_H261_ENCODER)
681             return -1;
682         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
683             av_log(avctx, AV_LOG_ERROR,
684                    "The specified picture size of %dx%d is not valid for the "
685                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
686                     s->width, s->height);
687             return -1;
688         }
689         s->out_format = FMT_H261;
690         avctx->delay  = 0;
691         s->low_delay  = 1;
692         break;
693     case AV_CODEC_ID_H263:
694         if (!CONFIG_H263_ENCODER)
695             return -1;
696         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
697                              s->width, s->height) == 8) {
698             av_log(avctx, AV_LOG_ERROR,
699                    "The specified picture size of %dx%d is not valid for "
700                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
701                    "352x288, 704x576, and 1408x1152. "
702                    "Try H.263+.\n", s->width, s->height);
703             return -1;
704         }
705         s->out_format = FMT_H263;
706         avctx->delay  = 0;
707         s->low_delay  = 1;
708         break;
709     case AV_CODEC_ID_H263P:
710         s->out_format = FMT_H263;
711         s->h263_plus  = 1;
712         /* Fx */
713         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
714         s->modified_quant  = s->h263_aic;
715         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
716         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
717
718         /* /Fx */
719         /* These are just to be sure */
720         avctx->delay = 0;
721         s->low_delay = 1;
722         break;
723     case AV_CODEC_ID_FLV1:
724         s->out_format      = FMT_H263;
725         s->h263_flv        = 2; /* format = 1; 11-bit codes */
726         s->unrestricted_mv = 1;
727         s->rtp_mode  = 0; /* don't allow GOB */
728         avctx->delay = 0;
729         s->low_delay = 1;
730         break;
731     case AV_CODEC_ID_RV10:
732         s->out_format = FMT_H263;
733         avctx->delay  = 0;
734         s->low_delay  = 1;
735         break;
736     case AV_CODEC_ID_RV20:
737         s->out_format      = FMT_H263;
738         avctx->delay       = 0;
739         s->low_delay       = 1;
740         s->modified_quant  = 1;
741         s->h263_aic        = 1;
742         s->h263_plus       = 1;
743         s->loop_filter     = 1;
744         s->unrestricted_mv = 0;
745         break;
746     case AV_CODEC_ID_MPEG4:
747         s->out_format      = FMT_H263;
748         s->h263_pred       = 1;
749         s->unrestricted_mv = 1;
750         s->low_delay       = s->max_b_frames ? 0 : 1;
751         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
752         break;
753     case AV_CODEC_ID_MSMPEG4V2:
754         s->out_format      = FMT_H263;
755         s->h263_pred       = 1;
756         s->unrestricted_mv = 1;
757         s->msmpeg4_version = 2;
758         avctx->delay       = 0;
759         s->low_delay       = 1;
760         break;
761     case AV_CODEC_ID_MSMPEG4V3:
762         s->out_format        = FMT_H263;
763         s->h263_pred         = 1;
764         s->unrestricted_mv   = 1;
765         s->msmpeg4_version   = 3;
766         s->flipflop_rounding = 1;
767         avctx->delay         = 0;
768         s->low_delay         = 1;
769         break;
770     case AV_CODEC_ID_WMV1:
771         s->out_format        = FMT_H263;
772         s->h263_pred         = 1;
773         s->unrestricted_mv   = 1;
774         s->msmpeg4_version   = 4;
775         s->flipflop_rounding = 1;
776         avctx->delay         = 0;
777         s->low_delay         = 1;
778         break;
779     case AV_CODEC_ID_WMV2:
780         s->out_format        = FMT_H263;
781         s->h263_pred         = 1;
782         s->unrestricted_mv   = 1;
783         s->msmpeg4_version   = 5;
784         s->flipflop_rounding = 1;
785         avctx->delay         = 0;
786         s->low_delay         = 1;
787         break;
788     default:
789         return -1;
790     }
791
792     avctx->has_b_frames = !s->low_delay;
793
794     s->encoding = 1;
795
796     s->progressive_frame    =
797     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
798                                                 CODEC_FLAG_INTERLACED_ME) ||
799                                 s->alternate_scan);
800
801     /* init */
802     if (ff_MPV_common_init(s) < 0)
803         return -1;
804
805     s->avctx->coded_frame = &s->current_picture.f;
806
807     if (s->msmpeg4_version) {
808         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
809                           2 * 2 * (MAX_LEVEL + 1) *
810                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
811     }
812     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
813
814     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
819     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
820     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
821                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
822     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
823                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
824
825     if (s->avctx->noise_reduction) {
826         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
827                           2 * 64 * sizeof(uint16_t), fail);
828     }
829
830     ff_dct_encode_init(s);
831
832     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
833         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
834
835     s->quant_precision = 5;
836
837     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
838     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
839
840     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
841         ff_h261_encode_init(s);
842     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
843         ff_h263_encode_init(s);
844     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
845         ff_msmpeg4_encode_init(s);
846     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
847         && s->out_format == FMT_MPEG1)
848         ff_mpeg1_encode_init(s);
849
850     /* init q matrix */
851     for (i = 0; i < 64; i++) {
852         int j = s->dsp.idct_permutation[i];
853         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
854             s->mpeg_quant) {
855             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
856             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
857         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
858             s->intra_matrix[j] =
859             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
860         } else {
861             /* mpeg1/2 */
862             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
863             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
864         }
865         if (s->avctx->intra_matrix)
866             s->intra_matrix[j] = s->avctx->intra_matrix[i];
867         if (s->avctx->inter_matrix)
868             s->inter_matrix[j] = s->avctx->inter_matrix[i];
869     }
870
871     /* precompute matrix */
872     /* for mjpeg, we do include qscale in the matrix */
873     if (s->out_format != FMT_MJPEG) {
874         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
875                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
876                           31, 1);
877         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
878                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
879                           31, 0);
880     }
881
882     if (ff_rate_control_init(s) < 0)
883         return -1;
884
885 #if FF_API_ERROR_RATE
886     FF_DISABLE_DEPRECATION_WARNINGS
887     if (avctx->error_rate)
888         s->error_rate = avctx->error_rate;
889     FF_ENABLE_DEPRECATION_WARNINGS;
890 #endif
891
892     if (avctx->b_frame_strategy == 2) {
893         for (i = 0; i < s->max_b_frames + 2; i++) {
894             s->tmp_frames[i] = av_frame_alloc();
895             if (!s->tmp_frames[i])
896                 return AVERROR(ENOMEM);
897
898             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
899             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
900             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
901
902             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
903             if (ret < 0)
904                 return ret;
905         }
906     }
907
908     return 0;
909 fail:
910     ff_MPV_encode_end(avctx);
911     return AVERROR_UNKNOWN;
912 }
913
914 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
915 {
916     MpegEncContext *s = avctx->priv_data;
917     int i;
918
919     ff_rate_control_uninit(s);
920
921     ff_MPV_common_end(s);
922     if (CONFIG_MJPEG_ENCODER &&
923         s->out_format == FMT_MJPEG)
924         ff_mjpeg_encode_close(s);
925
926     av_freep(&avctx->extradata);
927
928     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
929         av_frame_free(&s->tmp_frames[i]);
930
931     ff_free_picture_tables(&s->new_picture);
932     ff_mpeg_unref_picture(s, &s->new_picture);
933
934     av_freep(&s->avctx->stats_out);
935     av_freep(&s->ac_stats);
936
937     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
938     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
939     s->q_chroma_intra_matrix=   NULL;
940     s->q_chroma_intra_matrix16= NULL;
941     av_freep(&s->q_intra_matrix);
942     av_freep(&s->q_inter_matrix);
943     av_freep(&s->q_intra_matrix16);
944     av_freep(&s->q_inter_matrix16);
945     av_freep(&s->input_picture);
946     av_freep(&s->reordered_input_picture);
947     av_freep(&s->dct_offset);
948
949     return 0;
950 }
951
952 static int get_sae(uint8_t *src, int ref, int stride)
953 {
954     int x,y;
955     int acc = 0;
956
957     for (y = 0; y < 16; y++) {
958         for (x = 0; x < 16; x++) {
959             acc += FFABS(src[x + y * stride] - ref);
960         }
961     }
962
963     return acc;
964 }
965
966 static int get_intra_count(MpegEncContext *s, uint8_t *src,
967                            uint8_t *ref, int stride)
968 {
969     int x, y, w, h;
970     int acc = 0;
971
972     w = s->width  & ~15;
973     h = s->height & ~15;
974
975     for (y = 0; y < h; y += 16) {
976         for (x = 0; x < w; x += 16) {
977             int offset = x + y * stride;
978             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
979                                      16);
980             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
981             int sae  = get_sae(src + offset, mean, stride);
982
983             acc += sae + 500 < sad;
984         }
985     }
986     return acc;
987 }
988
989
990 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
991 {
992     Picture *pic = NULL;
993     int64_t pts;
994     int i, display_picture_number = 0, ret;
995     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
996                                                  (s->low_delay ? 0 : 1);
997     int direct = 1;
998
999     if (pic_arg) {
1000         pts = pic_arg->pts;
1001         display_picture_number = s->input_picture_number++;
1002
1003         if (pts != AV_NOPTS_VALUE) {
1004             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1005                 int64_t last = s->user_specified_pts;
1006
1007                 if (pts <= last) {
1008                     av_log(s->avctx, AV_LOG_ERROR,
1009                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1010                            pts, last);
1011                     return AVERROR(EINVAL);
1012                 }
1013
1014                 if (!s->low_delay && display_picture_number == 1)
1015                     s->dts_delta = pts - last;
1016             }
1017             s->user_specified_pts = pts;
1018         } else {
1019             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1020                 s->user_specified_pts =
1021                 pts = s->user_specified_pts + 1;
1022                 av_log(s->avctx, AV_LOG_INFO,
1023                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1024                        pts);
1025             } else {
1026                 pts = display_picture_number;
1027             }
1028         }
1029     }
1030
1031     if (pic_arg) {
1032         if (!pic_arg->buf[0])
1033             direct = 0;
1034         if (pic_arg->linesize[0] != s->linesize)
1035             direct = 0;
1036         if (pic_arg->linesize[1] != s->uvlinesize)
1037             direct = 0;
1038         if (pic_arg->linesize[2] != s->uvlinesize)
1039             direct = 0;
1040         if ((s->width & 15) || (s->height & 15))
1041             direct = 0;
1042         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1043             direct = 0;
1044         if (s->linesize & (STRIDE_ALIGN-1))
1045             direct = 0;
1046
1047         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1048                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1049
1050         if (direct) {
1051             i = ff_find_unused_picture(s, 1);
1052             if (i < 0)
1053                 return i;
1054
1055             pic = &s->picture[i];
1056             pic->reference = 3;
1057
1058             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1059                 return ret;
1060             if (ff_alloc_picture(s, pic, 1) < 0) {
1061                 return -1;
1062             }
1063         } else {
1064             i = ff_find_unused_picture(s, 0);
1065             if (i < 0)
1066                 return i;
1067
1068             pic = &s->picture[i];
1069             pic->reference = 3;
1070
1071             if (ff_alloc_picture(s, pic, 0) < 0) {
1072                 return -1;
1073             }
1074
1075             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1076                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1077                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1078                 // empty
1079             } else {
1080                 int h_chroma_shift, v_chroma_shift;
1081                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1082                                                  &h_chroma_shift,
1083                                                  &v_chroma_shift);
1084
1085                 for (i = 0; i < 3; i++) {
1086                     int src_stride = pic_arg->linesize[i];
1087                     int dst_stride = i ? s->uvlinesize : s->linesize;
1088                     int h_shift = i ? h_chroma_shift : 0;
1089                     int v_shift = i ? v_chroma_shift : 0;
1090                     int w = s->width  >> h_shift;
1091                     int h = s->height >> v_shift;
1092                     uint8_t *src = pic_arg->data[i];
1093                     uint8_t *dst = pic->f.data[i];
1094
1095                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1096                         h = ((s->height + 15)/16*16) >> v_shift;
1097                     }
1098
1099                     if (!s->avctx->rc_buffer_size)
1100                         dst += INPLACE_OFFSET;
1101
1102                     if (src_stride == dst_stride)
1103                         memcpy(dst, src, src_stride * h);
1104                     else {
1105                         int h2 = h;
1106                         uint8_t *dst2 = dst;
1107                         while (h2--) {
1108                             memcpy(dst2, src, w);
1109                             dst2 += dst_stride;
1110                             src += src_stride;
1111                         }
1112                     }
1113                     if ((s->width & 15) || (s->height & 15)) {
1114                         s->dsp.draw_edges(dst, dst_stride,
1115                                           w, h,
1116                                           16>>h_shift,
1117                                           16>>v_shift,
1118                                           EDGE_BOTTOM);
1119                     }
1120                 }
1121             }
1122         }
1123         ret = av_frame_copy_props(&pic->f, pic_arg);
1124         if (ret < 0)
1125             return ret;
1126
1127         pic->f.display_picture_number = display_picture_number;
1128         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1129     }
1130
1131     /* shift buffer entries */
1132     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1133         s->input_picture[i - 1] = s->input_picture[i];
1134
1135     s->input_picture[encoding_delay] = (Picture*) pic;
1136
1137     return 0;
1138 }
1139
1140 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1141 {
1142     int x, y, plane;
1143     int score = 0;
1144     int64_t score64 = 0;
1145
1146     for (plane = 0; plane < 3; plane++) {
1147         const int stride = p->f.linesize[plane];
1148         const int bw = plane ? 1 : 2;
1149         for (y = 0; y < s->mb_height * bw; y++) {
1150             for (x = 0; x < s->mb_width * bw; x++) {
1151                 int off = p->shared ? 0 : 16;
1152                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1153                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1154                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1155
1156                 switch (FFABS(s->avctx->frame_skip_exp)) {
1157                 case 0: score    =  FFMAX(score, v);          break;
1158                 case 1: score   += FFABS(v);                  break;
1159                 case 2: score64 += v * (int64_t)v;                       break;
1160                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1161                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1162                 }
1163             }
1164         }
1165     }
1166     emms_c();
1167
1168     if (score)
1169         score64 = score;
1170     if (s->avctx->frame_skip_exp < 0)
1171         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1172                       -1.0/s->avctx->frame_skip_exp);
1173
1174     if (score64 < s->avctx->frame_skip_threshold)
1175         return 1;
1176     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1177         return 1;
1178     return 0;
1179 }
1180
1181 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1182 {
1183     AVPacket pkt = { 0 };
1184     int ret, got_output;
1185
1186     av_init_packet(&pkt);
1187     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1188     if (ret < 0)
1189         return ret;
1190
1191     ret = pkt.size;
1192     av_free_packet(&pkt);
1193     return ret;
1194 }
1195
1196 static int estimate_best_b_count(MpegEncContext *s)
1197 {
1198     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1199     AVCodecContext *c = avcodec_alloc_context3(NULL);
1200     const int scale = s->avctx->brd_scale;
1201     int i, j, out_size, p_lambda, b_lambda, lambda2;
1202     int64_t best_rd  = INT64_MAX;
1203     int best_b_count = -1;
1204
1205     av_assert0(scale >= 0 && scale <= 3);
1206
1207     //emms_c();
1208     //s->next_picture_ptr->quality;
1209     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1210     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1211     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1212     if (!b_lambda) // FIXME we should do this somewhere else
1213         b_lambda = p_lambda;
1214     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1215                FF_LAMBDA_SHIFT;
1216
1217     c->width        = s->width  >> scale;
1218     c->height       = s->height >> scale;
1219     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1220                       CODEC_FLAG_INPUT_PRESERVED;
1221     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1222     c->mb_decision  = s->avctx->mb_decision;
1223     c->me_cmp       = s->avctx->me_cmp;
1224     c->mb_cmp       = s->avctx->mb_cmp;
1225     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1226     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1227     c->time_base    = s->avctx->time_base;
1228     c->max_b_frames = s->max_b_frames;
1229
1230     if (avcodec_open2(c, codec, NULL) < 0)
1231         return -1;
1232
1233     for (i = 0; i < s->max_b_frames + 2; i++) {
1234         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1235                                                 s->next_picture_ptr;
1236
1237         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1238             pre_input = *pre_input_ptr;
1239
1240             if (!pre_input.shared && i) {
1241                 pre_input.f.data[0] += INPLACE_OFFSET;
1242                 pre_input.f.data[1] += INPLACE_OFFSET;
1243                 pre_input.f.data[2] += INPLACE_OFFSET;
1244             }
1245
1246             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1247                                  pre_input.f.data[0], pre_input.f.linesize[0],
1248                                  c->width,      c->height);
1249             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1250                                  pre_input.f.data[1], pre_input.f.linesize[1],
1251                                  c->width >> 1, c->height >> 1);
1252             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1253                                  pre_input.f.data[2], pre_input.f.linesize[2],
1254                                  c->width >> 1, c->height >> 1);
1255         }
1256     }
1257
1258     for (j = 0; j < s->max_b_frames + 1; j++) {
1259         int64_t rd = 0;
1260
1261         if (!s->input_picture[j])
1262             break;
1263
1264         c->error[0] = c->error[1] = c->error[2] = 0;
1265
1266         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1267         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1268
1269         out_size = encode_frame(c, s->tmp_frames[0]);
1270
1271         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1272
1273         for (i = 0; i < s->max_b_frames + 1; i++) {
1274             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1275
1276             s->tmp_frames[i + 1]->pict_type = is_p ?
1277                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1278             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1279
1280             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1281
1282             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1283         }
1284
1285         /* get the delayed frames */
1286         while (out_size) {
1287             out_size = encode_frame(c, NULL);
1288             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1289         }
1290
1291         rd += c->error[0] + c->error[1] + c->error[2];
1292
1293         if (rd < best_rd) {
1294             best_rd = rd;
1295             best_b_count = j;
1296         }
1297     }
1298
1299     avcodec_close(c);
1300     av_freep(&c);
1301
1302     return best_b_count;
1303 }
1304
1305 static int select_input_picture(MpegEncContext *s)
1306 {
1307     int i, ret;
1308
1309     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1310         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1311     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1312
1313     /* set next picture type & ordering */
1314     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1315         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1316             if (s->picture_in_gop_number < s->gop_size &&
1317                 s->next_picture_ptr &&
1318                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1319                 // FIXME check that te gop check above is +-1 correct
1320                 av_frame_unref(&s->input_picture[0]->f);
1321
1322                 ff_vbv_update(s, 0);
1323
1324                 goto no_output_pic;
1325             }
1326         }
1327
1328         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1329             s->next_picture_ptr == NULL || s->intra_only) {
1330             s->reordered_input_picture[0] = s->input_picture[0];
1331             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1332             s->reordered_input_picture[0]->f.coded_picture_number =
1333                 s->coded_picture_number++;
1334         } else {
1335             int b_frames;
1336
1337             if (s->flags & CODEC_FLAG_PASS2) {
1338                 for (i = 0; i < s->max_b_frames + 1; i++) {
1339                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1340
1341                     if (pict_num >= s->rc_context.num_entries)
1342                         break;
1343                     if (!s->input_picture[i]) {
1344                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1345                         break;
1346                     }
1347
1348                     s->input_picture[i]->f.pict_type =
1349                         s->rc_context.entry[pict_num].new_pict_type;
1350                 }
1351             }
1352
1353             if (s->avctx->b_frame_strategy == 0) {
1354                 b_frames = s->max_b_frames;
1355                 while (b_frames && !s->input_picture[b_frames])
1356                     b_frames--;
1357             } else if (s->avctx->b_frame_strategy == 1) {
1358                 for (i = 1; i < s->max_b_frames + 1; i++) {
1359                     if (s->input_picture[i] &&
1360                         s->input_picture[i]->b_frame_score == 0) {
1361                         s->input_picture[i]->b_frame_score =
1362                             get_intra_count(s,
1363                                             s->input_picture[i    ]->f.data[0],
1364                                             s->input_picture[i - 1]->f.data[0],
1365                                             s->linesize) + 1;
1366                     }
1367                 }
1368                 for (i = 0; i < s->max_b_frames + 1; i++) {
1369                     if (s->input_picture[i] == NULL ||
1370                         s->input_picture[i]->b_frame_score - 1 >
1371                             s->mb_num / s->avctx->b_sensitivity)
1372                         break;
1373                 }
1374
1375                 b_frames = FFMAX(0, i - 1);
1376
1377                 /* reset scores */
1378                 for (i = 0; i < b_frames + 1; i++) {
1379                     s->input_picture[i]->b_frame_score = 0;
1380                 }
1381             } else if (s->avctx->b_frame_strategy == 2) {
1382                 b_frames = estimate_best_b_count(s);
1383             } else {
1384                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1385                 b_frames = 0;
1386             }
1387
1388             emms_c();
1389
1390             for (i = b_frames - 1; i >= 0; i--) {
1391                 int type = s->input_picture[i]->f.pict_type;
1392                 if (type && type != AV_PICTURE_TYPE_B)
1393                     b_frames = i;
1394             }
1395             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1396                 b_frames == s->max_b_frames) {
1397                 av_log(s->avctx, AV_LOG_ERROR,
1398                        "warning, too many b frames in a row\n");
1399             }
1400
1401             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1402                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1403                     s->gop_size > s->picture_in_gop_number) {
1404                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1405                 } else {
1406                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1407                         b_frames = 0;
1408                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1409                 }
1410             }
1411
1412             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1413                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1414                 b_frames--;
1415
1416             s->reordered_input_picture[0] = s->input_picture[b_frames];
1417             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1418                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1419             s->reordered_input_picture[0]->f.coded_picture_number =
1420                 s->coded_picture_number++;
1421             for (i = 0; i < b_frames; i++) {
1422                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1423                 s->reordered_input_picture[i + 1]->f.pict_type =
1424                     AV_PICTURE_TYPE_B;
1425                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1426                     s->coded_picture_number++;
1427             }
1428         }
1429     }
1430 no_output_pic:
1431     if (s->reordered_input_picture[0]) {
1432         s->reordered_input_picture[0]->reference =
1433            s->reordered_input_picture[0]->f.pict_type !=
1434                AV_PICTURE_TYPE_B ? 3 : 0;
1435
1436         ff_mpeg_unref_picture(s, &s->new_picture);
1437         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1438             return ret;
1439
1440         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1441             // input is a shared pix, so we can't modifiy it -> alloc a new
1442             // one & ensure that the shared one is reuseable
1443
1444             Picture *pic;
1445             int i = ff_find_unused_picture(s, 0);
1446             if (i < 0)
1447                 return i;
1448             pic = &s->picture[i];
1449
1450             pic->reference = s->reordered_input_picture[0]->reference;
1451             if (ff_alloc_picture(s, pic, 0) < 0) {
1452                 return -1;
1453             }
1454
1455             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1456             if (ret < 0)
1457                 return ret;
1458
1459             /* mark us unused / free shared pic */
1460             av_frame_unref(&s->reordered_input_picture[0]->f);
1461             s->reordered_input_picture[0]->shared = 0;
1462
1463             s->current_picture_ptr = pic;
1464         } else {
1465             // input is not a shared pix -> reuse buffer for current_pix
1466             s->current_picture_ptr = s->reordered_input_picture[0];
1467             for (i = 0; i < 4; i++) {
1468                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1469             }
1470         }
1471         ff_mpeg_unref_picture(s, &s->current_picture);
1472         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1473                                        s->current_picture_ptr)) < 0)
1474             return ret;
1475
1476         s->picture_number = s->new_picture.f.display_picture_number;
1477     } else {
1478         ff_mpeg_unref_picture(s, &s->new_picture);
1479     }
1480     return 0;
1481 }
1482
1483 static void frame_end(MpegEncContext *s)
1484 {
1485     if (s->unrestricted_mv &&
1486         s->current_picture.reference &&
1487         !s->intra_only) {
1488         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1489         int hshift = desc->log2_chroma_w;
1490         int vshift = desc->log2_chroma_h;
1491         s->dsp.draw_edges(s->current_picture.f.data[0], s->current_picture.f.linesize[0],
1492                           s->h_edge_pos, s->v_edge_pos,
1493                           EDGE_WIDTH, EDGE_WIDTH,
1494                           EDGE_TOP | EDGE_BOTTOM);
1495         s->dsp.draw_edges(s->current_picture.f.data[1], s->current_picture.f.linesize[1],
1496                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1497                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1498                           EDGE_TOP | EDGE_BOTTOM);
1499         s->dsp.draw_edges(s->current_picture.f.data[2], s->current_picture.f.linesize[2],
1500                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1501                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1502                           EDGE_TOP | EDGE_BOTTOM);
1503     }
1504
1505     emms_c();
1506
1507     s->last_pict_type                 = s->pict_type;
1508     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1509     if (s->pict_type!= AV_PICTURE_TYPE_B)
1510         s->last_non_b_pict_type = s->pict_type;
1511
1512     s->avctx->coded_frame = &s->current_picture_ptr->f;
1513
1514 }
1515
1516 static void update_noise_reduction(MpegEncContext *s)
1517 {
1518     int intra, i;
1519
1520     for (intra = 0; intra < 2; intra++) {
1521         if (s->dct_count[intra] > (1 << 16)) {
1522             for (i = 0; i < 64; i++) {
1523                 s->dct_error_sum[intra][i] >>= 1;
1524             }
1525             s->dct_count[intra] >>= 1;
1526         }
1527
1528         for (i = 0; i < 64; i++) {
1529             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1530                                        s->dct_count[intra] +
1531                                        s->dct_error_sum[intra][i] / 2) /
1532                                       (s->dct_error_sum[intra][i] + 1);
1533         }
1534     }
1535 }
1536
1537 static int frame_start(MpegEncContext *s)
1538 {
1539     int ret;
1540
1541     /* mark & release old frames */
1542     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1543         s->last_picture_ptr != s->next_picture_ptr &&
1544         s->last_picture_ptr->f.buf[0]) {
1545         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1546     }
1547
1548     s->current_picture_ptr->f.pict_type = s->pict_type;
1549     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1550
1551     ff_mpeg_unref_picture(s, &s->current_picture);
1552     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1553                                    s->current_picture_ptr)) < 0)
1554         return ret;
1555
1556     if (s->pict_type != AV_PICTURE_TYPE_B) {
1557         s->last_picture_ptr = s->next_picture_ptr;
1558         if (!s->droppable)
1559             s->next_picture_ptr = s->current_picture_ptr;
1560     }
1561
1562     if (s->last_picture_ptr) {
1563         ff_mpeg_unref_picture(s, &s->last_picture);
1564         if (s->last_picture_ptr->f.buf[0] &&
1565             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1566                                        s->last_picture_ptr)) < 0)
1567             return ret;
1568     }
1569     if (s->next_picture_ptr) {
1570         ff_mpeg_unref_picture(s, &s->next_picture);
1571         if (s->next_picture_ptr->f.buf[0] &&
1572             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1573                                        s->next_picture_ptr)) < 0)
1574             return ret;
1575     }
1576
1577     if (s->picture_structure!= PICT_FRAME) {
1578         int i;
1579         for (i = 0; i < 4; i++) {
1580             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1581                 s->current_picture.f.data[i] +=
1582                     s->current_picture.f.linesize[i];
1583             }
1584             s->current_picture.f.linesize[i] *= 2;
1585             s->last_picture.f.linesize[i]    *= 2;
1586             s->next_picture.f.linesize[i]    *= 2;
1587         }
1588     }
1589
1590     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1591         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1592         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1593     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1594         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1595         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1596     } else {
1597         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1598         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1599     }
1600
1601     if (s->dct_error_sum) {
1602         av_assert2(s->avctx->noise_reduction && s->encoding);
1603         update_noise_reduction(s);
1604     }
1605
1606     return 0;
1607 }
1608
1609 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1610                           const AVFrame *pic_arg, int *got_packet)
1611 {
1612     MpegEncContext *s = avctx->priv_data;
1613     int i, stuffing_count, ret;
1614     int context_count = s->slice_context_count;
1615
1616     s->picture_in_gop_number++;
1617
1618     if (load_input_picture(s, pic_arg) < 0)
1619         return -1;
1620
1621     if (select_input_picture(s) < 0) {
1622         return -1;
1623     }
1624
1625     /* output? */
1626     if (s->new_picture.f.data[0]) {
1627         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1628             return ret;
1629         if (s->mb_info) {
1630             s->mb_info_ptr = av_packet_new_side_data(pkt,
1631                                  AV_PKT_DATA_H263_MB_INFO,
1632                                  s->mb_width*s->mb_height*12);
1633             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1634         }
1635
1636         for (i = 0; i < context_count; i++) {
1637             int start_y = s->thread_context[i]->start_mb_y;
1638             int   end_y = s->thread_context[i]->  end_mb_y;
1639             int h       = s->mb_height;
1640             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1641             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1642
1643             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1644         }
1645
1646         s->pict_type = s->new_picture.f.pict_type;
1647         //emms_c();
1648         ret = frame_start(s);
1649         if (ret < 0)
1650             return ret;
1651 vbv_retry:
1652         if (encode_picture(s, s->picture_number) < 0)
1653             return -1;
1654
1655         avctx->header_bits = s->header_bits;
1656         avctx->mv_bits     = s->mv_bits;
1657         avctx->misc_bits   = s->misc_bits;
1658         avctx->i_tex_bits  = s->i_tex_bits;
1659         avctx->p_tex_bits  = s->p_tex_bits;
1660         avctx->i_count     = s->i_count;
1661         // FIXME f/b_count in avctx
1662         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1663         avctx->skip_count  = s->skip_count;
1664
1665         frame_end(s);
1666
1667         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1668             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1669
1670         if (avctx->rc_buffer_size) {
1671             RateControlContext *rcc = &s->rc_context;
1672             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1673
1674             if (put_bits_count(&s->pb) > max_size &&
1675                 s->lambda < s->avctx->lmax) {
1676                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1677                                        (s->qscale + 1) / s->qscale);
1678                 if (s->adaptive_quant) {
1679                     int i;
1680                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1681                         s->lambda_table[i] =
1682                             FFMAX(s->lambda_table[i] + 1,
1683                                   s->lambda_table[i] * (s->qscale + 1) /
1684                                   s->qscale);
1685                 }
1686                 s->mb_skipped = 0;        // done in frame_start()
1687                 // done in encode_picture() so we must undo it
1688                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1689                     if (s->flipflop_rounding          ||
1690                         s->codec_id == AV_CODEC_ID_H263P ||
1691                         s->codec_id == AV_CODEC_ID_MPEG4)
1692                         s->no_rounding ^= 1;
1693                 }
1694                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1695                     s->time_base       = s->last_time_base;
1696                     s->last_non_b_time = s->time - s->pp_time;
1697                 }
1698                 for (i = 0; i < context_count; i++) {
1699                     PutBitContext *pb = &s->thread_context[i]->pb;
1700                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1701                 }
1702                 goto vbv_retry;
1703             }
1704
1705             assert(s->avctx->rc_max_rate);
1706         }
1707
1708         if (s->flags & CODEC_FLAG_PASS1)
1709             ff_write_pass1_stats(s);
1710
1711         for (i = 0; i < 4; i++) {
1712             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1713             avctx->error[i] += s->current_picture_ptr->f.error[i];
1714         }
1715
1716         if (s->flags & CODEC_FLAG_PASS1)
1717             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1718                    avctx->i_tex_bits + avctx->p_tex_bits ==
1719                        put_bits_count(&s->pb));
1720         flush_put_bits(&s->pb);
1721         s->frame_bits  = put_bits_count(&s->pb);
1722
1723         stuffing_count = ff_vbv_update(s, s->frame_bits);
1724         s->stuffing_bits = 8*stuffing_count;
1725         if (stuffing_count) {
1726             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1727                     stuffing_count + 50) {
1728                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1729                 return -1;
1730             }
1731
1732             switch (s->codec_id) {
1733             case AV_CODEC_ID_MPEG1VIDEO:
1734             case AV_CODEC_ID_MPEG2VIDEO:
1735                 while (stuffing_count--) {
1736                     put_bits(&s->pb, 8, 0);
1737                 }
1738             break;
1739             case AV_CODEC_ID_MPEG4:
1740                 put_bits(&s->pb, 16, 0);
1741                 put_bits(&s->pb, 16, 0x1C3);
1742                 stuffing_count -= 4;
1743                 while (stuffing_count--) {
1744                     put_bits(&s->pb, 8, 0xFF);
1745                 }
1746             break;
1747             default:
1748                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1749             }
1750             flush_put_bits(&s->pb);
1751             s->frame_bits  = put_bits_count(&s->pb);
1752         }
1753
1754         /* update mpeg1/2 vbv_delay for CBR */
1755         if (s->avctx->rc_max_rate                          &&
1756             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1757             s->out_format == FMT_MPEG1                     &&
1758             90000LL * (avctx->rc_buffer_size - 1) <=
1759                 s->avctx->rc_max_rate * 0xFFFFLL) {
1760             int vbv_delay, min_delay;
1761             double inbits  = s->avctx->rc_max_rate *
1762                              av_q2d(s->avctx->time_base);
1763             int    minbits = s->frame_bits - 8 *
1764                              (s->vbv_delay_ptr - s->pb.buf - 1);
1765             double bits    = s->rc_context.buffer_index + minbits - inbits;
1766
1767             if (bits < 0)
1768                 av_log(s->avctx, AV_LOG_ERROR,
1769                        "Internal error, negative bits\n");
1770
1771             assert(s->repeat_first_field == 0);
1772
1773             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1774             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1775                         s->avctx->rc_max_rate;
1776
1777             vbv_delay = FFMAX(vbv_delay, min_delay);
1778
1779             av_assert0(vbv_delay < 0xFFFF);
1780
1781             s->vbv_delay_ptr[0] &= 0xF8;
1782             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1783             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1784             s->vbv_delay_ptr[2] &= 0x07;
1785             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1786             avctx->vbv_delay     = vbv_delay * 300;
1787         }
1788         s->total_bits     += s->frame_bits;
1789         avctx->frame_bits  = s->frame_bits;
1790
1791         pkt->pts = s->current_picture.f.pts;
1792         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1793             if (!s->current_picture.f.coded_picture_number)
1794                 pkt->dts = pkt->pts - s->dts_delta;
1795             else
1796                 pkt->dts = s->reordered_pts;
1797             s->reordered_pts = pkt->pts;
1798         } else
1799             pkt->dts = pkt->pts;
1800         if (s->current_picture.f.key_frame)
1801             pkt->flags |= AV_PKT_FLAG_KEY;
1802         if (s->mb_info)
1803             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1804     } else {
1805         s->frame_bits = 0;
1806     }
1807
1808     /* release non-reference frames */
1809     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1810         if (!s->picture[i].reference)
1811             ff_mpeg_unref_picture(s, &s->picture[i]);
1812     }
1813
1814     assert((s->frame_bits & 7) == 0);
1815
1816     pkt->size = s->frame_bits / 8;
1817     *got_packet = !!pkt->size;
1818     return 0;
1819 }
1820
1821 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1822                                                 int n, int threshold)
1823 {
1824     static const char tab[64] = {
1825         3, 2, 2, 1, 1, 1, 1, 1,
1826         1, 1, 1, 1, 1, 1, 1, 1,
1827         1, 1, 1, 1, 1, 1, 1, 1,
1828         0, 0, 0, 0, 0, 0, 0, 0,
1829         0, 0, 0, 0, 0, 0, 0, 0,
1830         0, 0, 0, 0, 0, 0, 0, 0,
1831         0, 0, 0, 0, 0, 0, 0, 0,
1832         0, 0, 0, 0, 0, 0, 0, 0
1833     };
1834     int score = 0;
1835     int run = 0;
1836     int i;
1837     int16_t *block = s->block[n];
1838     const int last_index = s->block_last_index[n];
1839     int skip_dc;
1840
1841     if (threshold < 0) {
1842         skip_dc = 0;
1843         threshold = -threshold;
1844     } else
1845         skip_dc = 1;
1846
1847     /* Are all we could set to zero already zero? */
1848     if (last_index <= skip_dc - 1)
1849         return;
1850
1851     for (i = 0; i <= last_index; i++) {
1852         const int j = s->intra_scantable.permutated[i];
1853         const int level = FFABS(block[j]);
1854         if (level == 1) {
1855             if (skip_dc && i == 0)
1856                 continue;
1857             score += tab[run];
1858             run = 0;
1859         } else if (level > 1) {
1860             return;
1861         } else {
1862             run++;
1863         }
1864     }
1865     if (score >= threshold)
1866         return;
1867     for (i = skip_dc; i <= last_index; i++) {
1868         const int j = s->intra_scantable.permutated[i];
1869         block[j] = 0;
1870     }
1871     if (block[0])
1872         s->block_last_index[n] = 0;
1873     else
1874         s->block_last_index[n] = -1;
1875 }
1876
1877 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1878                                int last_index)
1879 {
1880     int i;
1881     const int maxlevel = s->max_qcoeff;
1882     const int minlevel = s->min_qcoeff;
1883     int overflow = 0;
1884
1885     if (s->mb_intra) {
1886         i = 1; // skip clipping of intra dc
1887     } else
1888         i = 0;
1889
1890     for (; i <= last_index; i++) {
1891         const int j = s->intra_scantable.permutated[i];
1892         int level = block[j];
1893
1894         if (level > maxlevel) {
1895             level = maxlevel;
1896             overflow++;
1897         } else if (level < minlevel) {
1898             level = minlevel;
1899             overflow++;
1900         }
1901
1902         block[j] = level;
1903     }
1904
1905     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1906         av_log(s->avctx, AV_LOG_INFO,
1907                "warning, clipping %d dct coefficients to %d..%d\n",
1908                overflow, minlevel, maxlevel);
1909 }
1910
1911 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1912 {
1913     int x, y;
1914     // FIXME optimize
1915     for (y = 0; y < 8; y++) {
1916         for (x = 0; x < 8; x++) {
1917             int x2, y2;
1918             int sum = 0;
1919             int sqr = 0;
1920             int count = 0;
1921
1922             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1923                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1924                     int v = ptr[x2 + y2 * stride];
1925                     sum += v;
1926                     sqr += v * v;
1927                     count++;
1928                 }
1929             }
1930             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1931         }
1932     }
1933 }
1934
1935 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1936                                                 int motion_x, int motion_y,
1937                                                 int mb_block_height,
1938                                                 int mb_block_width,
1939                                                 int mb_block_count)
1940 {
1941     int16_t weight[12][64];
1942     int16_t orig[12][64];
1943     const int mb_x = s->mb_x;
1944     const int mb_y = s->mb_y;
1945     int i;
1946     int skip_dct[12];
1947     int dct_offset = s->linesize * 8; // default for progressive frames
1948     int uv_dct_offset = s->uvlinesize * 8;
1949     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1950     ptrdiff_t wrap_y, wrap_c;
1951
1952     for (i = 0; i < mb_block_count; i++)
1953         skip_dct[i] = s->skipdct;
1954
1955     if (s->adaptive_quant) {
1956         const int last_qp = s->qscale;
1957         const int mb_xy = mb_x + mb_y * s->mb_stride;
1958
1959         s->lambda = s->lambda_table[mb_xy];
1960         update_qscale(s);
1961
1962         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1963             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1964             s->dquant = s->qscale - last_qp;
1965
1966             if (s->out_format == FMT_H263) {
1967                 s->dquant = av_clip(s->dquant, -2, 2);
1968
1969                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1970                     if (!s->mb_intra) {
1971                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1972                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1973                                 s->dquant = 0;
1974                         }
1975                         if (s->mv_type == MV_TYPE_8X8)
1976                             s->dquant = 0;
1977                     }
1978                 }
1979             }
1980         }
1981         ff_set_qscale(s, last_qp + s->dquant);
1982     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1983         ff_set_qscale(s, s->qscale + s->dquant);
1984
1985     wrap_y = s->linesize;
1986     wrap_c = s->uvlinesize;
1987     ptr_y  = s->new_picture.f.data[0] +
1988              (mb_y * 16 * wrap_y)              + mb_x * 16;
1989     ptr_cb = s->new_picture.f.data[1] +
1990              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1991     ptr_cr = s->new_picture.f.data[2] +
1992              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1993
1994     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1995         uint8_t *ebuf = s->edge_emu_buffer + 32;
1996         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1997         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1998         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1999                                  wrap_y, wrap_y,
2000                                  16, 16, mb_x * 16, mb_y * 16,
2001                                  s->width, s->height);
2002         ptr_y = ebuf;
2003         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2004                                  wrap_c, wrap_c,
2005                                  mb_block_width, mb_block_height,
2006                                  mb_x * mb_block_width, mb_y * mb_block_height,
2007                                  cw, ch);
2008         ptr_cb = ebuf + 18 * wrap_y;
2009         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2010                                  wrap_c, wrap_c,
2011                                  mb_block_width, mb_block_height,
2012                                  mb_x * mb_block_width, mb_y * mb_block_height,
2013                                  cw, ch);
2014         ptr_cr = ebuf + 18 * wrap_y + 16;
2015     }
2016
2017     if (s->mb_intra) {
2018         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2019             int progressive_score, interlaced_score;
2020
2021             s->interlaced_dct = 0;
2022             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2023                                                     NULL, wrap_y, 8) +
2024                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2025                                                     NULL, wrap_y, 8) - 400;
2026
2027             if (progressive_score > 0) {
2028                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2029                                                        NULL, wrap_y * 2, 8) +
2030                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2031                                                        NULL, wrap_y * 2, 8);
2032                 if (progressive_score > interlaced_score) {
2033                     s->interlaced_dct = 1;
2034
2035                     dct_offset = wrap_y;
2036                     uv_dct_offset = wrap_c;
2037                     wrap_y <<= 1;
2038                     if (s->chroma_format == CHROMA_422 ||
2039                         s->chroma_format == CHROMA_444)
2040                         wrap_c <<= 1;
2041                 }
2042             }
2043         }
2044
2045         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2046         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2047         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2048         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2049
2050         if (s->flags & CODEC_FLAG_GRAY) {
2051             skip_dct[4] = 1;
2052             skip_dct[5] = 1;
2053         } else {
2054             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2055             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2056             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2057                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2058                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2059             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2060                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2061                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2062                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2063                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2064                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2065                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2066             }
2067         }
2068     } else {
2069         op_pixels_func (*op_pix)[4];
2070         qpel_mc_func (*op_qpix)[16];
2071         uint8_t *dest_y, *dest_cb, *dest_cr;
2072
2073         dest_y  = s->dest[0];
2074         dest_cb = s->dest[1];
2075         dest_cr = s->dest[2];
2076
2077         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2078             op_pix  = s->hdsp.put_pixels_tab;
2079             op_qpix = s->dsp.put_qpel_pixels_tab;
2080         } else {
2081             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2082             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2083         }
2084
2085         if (s->mv_dir & MV_DIR_FORWARD) {
2086             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2087                           s->last_picture.f.data,
2088                           op_pix, op_qpix);
2089             op_pix  = s->hdsp.avg_pixels_tab;
2090             op_qpix = s->dsp.avg_qpel_pixels_tab;
2091         }
2092         if (s->mv_dir & MV_DIR_BACKWARD) {
2093             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2094                           s->next_picture.f.data,
2095                           op_pix, op_qpix);
2096         }
2097
2098         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2099             int progressive_score, interlaced_score;
2100
2101             s->interlaced_dct = 0;
2102             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2103                                                     ptr_y,              wrap_y,
2104                                                     8) +
2105                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2106                                                     ptr_y + wrap_y * 8, wrap_y,
2107                                                     8) - 400;
2108
2109             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2110                 progressive_score -= 400;
2111
2112             if (progressive_score > 0) {
2113                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2114                                                        ptr_y,
2115                                                        wrap_y * 2, 8) +
2116                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2117                                                        ptr_y + wrap_y,
2118                                                        wrap_y * 2, 8);
2119
2120                 if (progressive_score > interlaced_score) {
2121                     s->interlaced_dct = 1;
2122
2123                     dct_offset = wrap_y;
2124                     uv_dct_offset = wrap_c;
2125                     wrap_y <<= 1;
2126                     if (s->chroma_format == CHROMA_422)
2127                         wrap_c <<= 1;
2128                 }
2129             }
2130         }
2131
2132         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2133         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2134         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2135                            dest_y + dct_offset, wrap_y);
2136         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2137                            dest_y + dct_offset + 8, wrap_y);
2138
2139         if (s->flags & CODEC_FLAG_GRAY) {
2140             skip_dct[4] = 1;
2141             skip_dct[5] = 1;
2142         } else {
2143             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2144             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2145             if (!s->chroma_y_shift) { /* 422 */
2146                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2147                                    dest_cb + uv_dct_offset, wrap_c);
2148                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2149                                    dest_cr + uv_dct_offset, wrap_c);
2150             }
2151         }
2152         /* pre quantization */
2153         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2154                 2 * s->qscale * s->qscale) {
2155             // FIXME optimize
2156             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2157                               wrap_y, 8) < 20 * s->qscale)
2158                 skip_dct[0] = 1;
2159             if (s->dsp.sad[1](NULL, ptr_y + 8,
2160                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2161                 skip_dct[1] = 1;
2162             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2163                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2164                 skip_dct[2] = 1;
2165             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2166                               dest_y + dct_offset + 8,
2167                               wrap_y, 8) < 20 * s->qscale)
2168                 skip_dct[3] = 1;
2169             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2170                               wrap_c, 8) < 20 * s->qscale)
2171                 skip_dct[4] = 1;
2172             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2173                               wrap_c, 8) < 20 * s->qscale)
2174                 skip_dct[5] = 1;
2175             if (!s->chroma_y_shift) { /* 422 */
2176                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2177                                   dest_cb + uv_dct_offset,
2178                                   wrap_c, 8) < 20 * s->qscale)
2179                     skip_dct[6] = 1;
2180                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2181                                   dest_cr + uv_dct_offset,
2182                                   wrap_c, 8) < 20 * s->qscale)
2183                     skip_dct[7] = 1;
2184             }
2185         }
2186     }
2187
2188     if (s->quantizer_noise_shaping) {
2189         if (!skip_dct[0])
2190             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2191         if (!skip_dct[1])
2192             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2193         if (!skip_dct[2])
2194             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2195         if (!skip_dct[3])
2196             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2197         if (!skip_dct[4])
2198             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2199         if (!skip_dct[5])
2200             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2201         if (!s->chroma_y_shift) { /* 422 */
2202             if (!skip_dct[6])
2203                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2204                                   wrap_c);
2205             if (!skip_dct[7])
2206                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2207                                   wrap_c);
2208         }
2209         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2210     }
2211
2212     /* DCT & quantize */
2213     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2214     {
2215         for (i = 0; i < mb_block_count; i++) {
2216             if (!skip_dct[i]) {
2217                 int overflow;
2218                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2219                 // FIXME we could decide to change to quantizer instead of
2220                 // clipping
2221                 // JS: I don't think that would be a good idea it could lower
2222                 //     quality instead of improve it. Just INTRADC clipping
2223                 //     deserves changes in quantizer
2224                 if (overflow)
2225                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2226             } else
2227                 s->block_last_index[i] = -1;
2228         }
2229         if (s->quantizer_noise_shaping) {
2230             for (i = 0; i < mb_block_count; i++) {
2231                 if (!skip_dct[i]) {
2232                     s->block_last_index[i] =
2233                         dct_quantize_refine(s, s->block[i], weight[i],
2234                                             orig[i], i, s->qscale);
2235                 }
2236             }
2237         }
2238
2239         if (s->luma_elim_threshold && !s->mb_intra)
2240             for (i = 0; i < 4; i++)
2241                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2242         if (s->chroma_elim_threshold && !s->mb_intra)
2243             for (i = 4; i < mb_block_count; i++)
2244                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2245
2246         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2247             for (i = 0; i < mb_block_count; i++) {
2248                 if (s->block_last_index[i] == -1)
2249                     s->coded_score[i] = INT_MAX / 256;
2250             }
2251         }
2252     }
2253
2254     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2255         s->block_last_index[4] =
2256         s->block_last_index[5] = 0;
2257         s->block[4][0] =
2258         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2259         if (!s->chroma_y_shift) { /* 422 / 444 */
2260             for (i=6; i<12; i++) {
2261                 s->block_last_index[i] = 0;
2262                 s->block[i][0] = s->block[4][0];
2263             }
2264         }
2265     }
2266
2267     // non c quantize code returns incorrect block_last_index FIXME
2268     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2269         for (i = 0; i < mb_block_count; i++) {
2270             int j;
2271             if (s->block_last_index[i] > 0) {
2272                 for (j = 63; j > 0; j--) {
2273                     if (s->block[i][s->intra_scantable.permutated[j]])
2274                         break;
2275                 }
2276                 s->block_last_index[i] = j;
2277             }
2278         }
2279     }
2280
2281     /* huffman encode */
2282     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2283     case AV_CODEC_ID_MPEG1VIDEO:
2284     case AV_CODEC_ID_MPEG2VIDEO:
2285         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2286             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2287         break;
2288     case AV_CODEC_ID_MPEG4:
2289         if (CONFIG_MPEG4_ENCODER)
2290             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2291         break;
2292     case AV_CODEC_ID_MSMPEG4V2:
2293     case AV_CODEC_ID_MSMPEG4V3:
2294     case AV_CODEC_ID_WMV1:
2295         if (CONFIG_MSMPEG4_ENCODER)
2296             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2297         break;
2298     case AV_CODEC_ID_WMV2:
2299         if (CONFIG_WMV2_ENCODER)
2300             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2301         break;
2302     case AV_CODEC_ID_H261:
2303         if (CONFIG_H261_ENCODER)
2304             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2305         break;
2306     case AV_CODEC_ID_H263:
2307     case AV_CODEC_ID_H263P:
2308     case AV_CODEC_ID_FLV1:
2309     case AV_CODEC_ID_RV10:
2310     case AV_CODEC_ID_RV20:
2311         if (CONFIG_H263_ENCODER)
2312             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2313         break;
2314     case AV_CODEC_ID_MJPEG:
2315     case AV_CODEC_ID_AMV:
2316         if (CONFIG_MJPEG_ENCODER)
2317             ff_mjpeg_encode_mb(s, s->block);
2318         break;
2319     default:
2320         av_assert1(0);
2321     }
2322 }
2323
2324 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2325 {
2326     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2327     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2328     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2329 }
2330
2331 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2332     int i;
2333
2334     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2335
2336     /* mpeg1 */
2337     d->mb_skip_run= s->mb_skip_run;
2338     for(i=0; i<3; i++)
2339         d->last_dc[i] = s->last_dc[i];
2340
2341     /* statistics */
2342     d->mv_bits= s->mv_bits;
2343     d->i_tex_bits= s->i_tex_bits;
2344     d->p_tex_bits= s->p_tex_bits;
2345     d->i_count= s->i_count;
2346     d->f_count= s->f_count;
2347     d->b_count= s->b_count;
2348     d->skip_count= s->skip_count;
2349     d->misc_bits= s->misc_bits;
2350     d->last_bits= 0;
2351
2352     d->mb_skipped= 0;
2353     d->qscale= s->qscale;
2354     d->dquant= s->dquant;
2355
2356     d->esc3_level_length= s->esc3_level_length;
2357 }
2358
2359 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2360     int i;
2361
2362     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2363     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2364
2365     /* mpeg1 */
2366     d->mb_skip_run= s->mb_skip_run;
2367     for(i=0; i<3; i++)
2368         d->last_dc[i] = s->last_dc[i];
2369
2370     /* statistics */
2371     d->mv_bits= s->mv_bits;
2372     d->i_tex_bits= s->i_tex_bits;
2373     d->p_tex_bits= s->p_tex_bits;
2374     d->i_count= s->i_count;
2375     d->f_count= s->f_count;
2376     d->b_count= s->b_count;
2377     d->skip_count= s->skip_count;
2378     d->misc_bits= s->misc_bits;
2379
2380     d->mb_intra= s->mb_intra;
2381     d->mb_skipped= s->mb_skipped;
2382     d->mv_type= s->mv_type;
2383     d->mv_dir= s->mv_dir;
2384     d->pb= s->pb;
2385     if(s->data_partitioning){
2386         d->pb2= s->pb2;
2387         d->tex_pb= s->tex_pb;
2388     }
2389     d->block= s->block;
2390     for(i=0; i<8; i++)
2391         d->block_last_index[i]= s->block_last_index[i];
2392     d->interlaced_dct= s->interlaced_dct;
2393     d->qscale= s->qscale;
2394
2395     d->esc3_level_length= s->esc3_level_length;
2396 }
2397
2398 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2399                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2400                            int *dmin, int *next_block, int motion_x, int motion_y)
2401 {
2402     int score;
2403     uint8_t *dest_backup[3];
2404
2405     copy_context_before_encode(s, backup, type);
2406
2407     s->block= s->blocks[*next_block];
2408     s->pb= pb[*next_block];
2409     if(s->data_partitioning){
2410         s->pb2   = pb2   [*next_block];
2411         s->tex_pb= tex_pb[*next_block];
2412     }
2413
2414     if(*next_block){
2415         memcpy(dest_backup, s->dest, sizeof(s->dest));
2416         s->dest[0] = s->rd_scratchpad;
2417         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2418         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2419         assert(s->linesize >= 32); //FIXME
2420     }
2421
2422     encode_mb(s, motion_x, motion_y);
2423
2424     score= put_bits_count(&s->pb);
2425     if(s->data_partitioning){
2426         score+= put_bits_count(&s->pb2);
2427         score+= put_bits_count(&s->tex_pb);
2428     }
2429
2430     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2431         ff_MPV_decode_mb(s, s->block);
2432
2433         score *= s->lambda2;
2434         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2435     }
2436
2437     if(*next_block){
2438         memcpy(s->dest, dest_backup, sizeof(s->dest));
2439     }
2440
2441     if(score<*dmin){
2442         *dmin= score;
2443         *next_block^=1;
2444
2445         copy_context_after_encode(best, s, type);
2446     }
2447 }
2448
2449 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2450     uint32_t *sq = ff_square_tab + 256;
2451     int acc=0;
2452     int x,y;
2453
2454     if(w==16 && h==16)
2455         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2456     else if(w==8 && h==8)
2457         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2458
2459     for(y=0; y<h; y++){
2460         for(x=0; x<w; x++){
2461             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2462         }
2463     }
2464
2465     av_assert2(acc>=0);
2466
2467     return acc;
2468 }
2469
2470 static int sse_mb(MpegEncContext *s){
2471     int w= 16;
2472     int h= 16;
2473
2474     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2475     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2476
2477     if(w==16 && h==16)
2478       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2479         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2480                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2481                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2482       }else{
2483         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2484                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2485                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2486       }
2487     else
2488         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2489                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2490                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2491 }
2492
2493 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2494     MpegEncContext *s= *(void**)arg;
2495
2496
2497     s->me.pre_pass=1;
2498     s->me.dia_size= s->avctx->pre_dia_size;
2499     s->first_slice_line=1;
2500     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2501         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2502             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2503         }
2504         s->first_slice_line=0;
2505     }
2506
2507     s->me.pre_pass=0;
2508
2509     return 0;
2510 }
2511
2512 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2513     MpegEncContext *s= *(void**)arg;
2514
2515     ff_check_alignment();
2516
2517     s->me.dia_size= s->avctx->dia_size;
2518     s->first_slice_line=1;
2519     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2520         s->mb_x=0; //for block init below
2521         ff_init_block_index(s);
2522         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2523             s->block_index[0]+=2;
2524             s->block_index[1]+=2;
2525             s->block_index[2]+=2;
2526             s->block_index[3]+=2;
2527
2528             /* compute motion vector & mb_type and store in context */
2529             if(s->pict_type==AV_PICTURE_TYPE_B)
2530                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2531             else
2532                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2533         }
2534         s->first_slice_line=0;
2535     }
2536     return 0;
2537 }
2538
2539 static int mb_var_thread(AVCodecContext *c, void *arg){
2540     MpegEncContext *s= *(void**)arg;
2541     int mb_x, mb_y;
2542
2543     ff_check_alignment();
2544
2545     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2546         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2547             int xx = mb_x * 16;
2548             int yy = mb_y * 16;
2549             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2550             int varc;
2551             int sum = s->dsp.pix_sum(pix, s->linesize);
2552
2553             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2554
2555             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2556             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2557             s->me.mb_var_sum_temp    += varc;
2558         }
2559     }
2560     return 0;
2561 }
2562
2563 static void write_slice_end(MpegEncContext *s){
2564     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2565         if(s->partitioned_frame){
2566             ff_mpeg4_merge_partitions(s);
2567         }
2568
2569         ff_mpeg4_stuffing(&s->pb);
2570     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2571         ff_mjpeg_encode_stuffing(s);
2572     }
2573
2574     avpriv_align_put_bits(&s->pb);
2575     flush_put_bits(&s->pb);
2576
2577     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2578         s->misc_bits+= get_bits_diff(s);
2579 }
2580
2581 static void write_mb_info(MpegEncContext *s)
2582 {
2583     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2584     int offset = put_bits_count(&s->pb);
2585     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2586     int gobn = s->mb_y / s->gob_index;
2587     int pred_x, pred_y;
2588     if (CONFIG_H263_ENCODER)
2589         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2590     bytestream_put_le32(&ptr, offset);
2591     bytestream_put_byte(&ptr, s->qscale);
2592     bytestream_put_byte(&ptr, gobn);
2593     bytestream_put_le16(&ptr, mba);
2594     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2595     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2596     /* 4MV not implemented */
2597     bytestream_put_byte(&ptr, 0); /* hmv2 */
2598     bytestream_put_byte(&ptr, 0); /* vmv2 */
2599 }
2600
2601 static void update_mb_info(MpegEncContext *s, int startcode)
2602 {
2603     if (!s->mb_info)
2604         return;
2605     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2606         s->mb_info_size += 12;
2607         s->prev_mb_info = s->last_mb_info;
2608     }
2609     if (startcode) {
2610         s->prev_mb_info = put_bits_count(&s->pb)/8;
2611         /* This might have incremented mb_info_size above, and we return without
2612          * actually writing any info into that slot yet. But in that case,
2613          * this will be called again at the start of the after writing the
2614          * start code, actually writing the mb info. */
2615         return;
2616     }
2617
2618     s->last_mb_info = put_bits_count(&s->pb)/8;
2619     if (!s->mb_info_size)
2620         s->mb_info_size += 12;
2621     write_mb_info(s);
2622 }
2623
2624 static int encode_thread(AVCodecContext *c, void *arg){
2625     MpegEncContext *s= *(void**)arg;
2626     int mb_x, mb_y, pdif = 0;
2627     int chr_h= 16>>s->chroma_y_shift;
2628     int i, j;
2629     MpegEncContext best_s, backup_s;
2630     uint8_t bit_buf[2][MAX_MB_BYTES];
2631     uint8_t bit_buf2[2][MAX_MB_BYTES];
2632     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2633     PutBitContext pb[2], pb2[2], tex_pb[2];
2634
2635     ff_check_alignment();
2636
2637     for(i=0; i<2; i++){
2638         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2639         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2640         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2641     }
2642
2643     s->last_bits= put_bits_count(&s->pb);
2644     s->mv_bits=0;
2645     s->misc_bits=0;
2646     s->i_tex_bits=0;
2647     s->p_tex_bits=0;
2648     s->i_count=0;
2649     s->f_count=0;
2650     s->b_count=0;
2651     s->skip_count=0;
2652
2653     for(i=0; i<3; i++){
2654         /* init last dc values */
2655         /* note: quant matrix value (8) is implied here */
2656         s->last_dc[i] = 128 << s->intra_dc_precision;
2657
2658         s->current_picture.f.error[i] = 0;
2659     }
2660     if(s->codec_id==AV_CODEC_ID_AMV){
2661         s->last_dc[0] = 128*8/13;
2662         s->last_dc[1] = 128*8/14;
2663         s->last_dc[2] = 128*8/14;
2664     }
2665     s->mb_skip_run = 0;
2666     memset(s->last_mv, 0, sizeof(s->last_mv));
2667
2668     s->last_mv_dir = 0;
2669
2670     switch(s->codec_id){
2671     case AV_CODEC_ID_H263:
2672     case AV_CODEC_ID_H263P:
2673     case AV_CODEC_ID_FLV1:
2674         if (CONFIG_H263_ENCODER)
2675             s->gob_index = ff_h263_get_gob_height(s);
2676         break;
2677     case AV_CODEC_ID_MPEG4:
2678         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2679             ff_mpeg4_init_partitions(s);
2680         break;
2681     }
2682
2683     s->resync_mb_x=0;
2684     s->resync_mb_y=0;
2685     s->first_slice_line = 1;
2686     s->ptr_lastgob = s->pb.buf;
2687     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2688         s->mb_x=0;
2689         s->mb_y= mb_y;
2690
2691         ff_set_qscale(s, s->qscale);
2692         ff_init_block_index(s);
2693
2694         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2695             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2696             int mb_type= s->mb_type[xy];
2697 //            int d;
2698             int dmin= INT_MAX;
2699             int dir;
2700
2701             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2702                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2703                 return -1;
2704             }
2705             if(s->data_partitioning){
2706                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2707                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2708                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2709                     return -1;
2710                 }
2711             }
2712
2713             s->mb_x = mb_x;
2714             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2715             ff_update_block_index(s);
2716
2717             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2718                 ff_h261_reorder_mb_index(s);
2719                 xy= s->mb_y*s->mb_stride + s->mb_x;
2720                 mb_type= s->mb_type[xy];
2721             }
2722
2723             /* write gob / video packet header  */
2724             if(s->rtp_mode){
2725                 int current_packet_size, is_gob_start;
2726
2727                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2728
2729                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2730
2731                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2732
2733                 switch(s->codec_id){
2734                 case AV_CODEC_ID_H263:
2735                 case AV_CODEC_ID_H263P:
2736                     if(!s->h263_slice_structured)
2737                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2738                     break;
2739                 case AV_CODEC_ID_MPEG2VIDEO:
2740                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2741                 case AV_CODEC_ID_MPEG1VIDEO:
2742                     if(s->mb_skip_run) is_gob_start=0;
2743                     break;
2744                 case AV_CODEC_ID_MJPEG:
2745                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2746                     break;
2747                 }
2748
2749                 if(is_gob_start){
2750                     if(s->start_mb_y != mb_y || mb_x!=0){
2751                         write_slice_end(s);
2752
2753                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2754                             ff_mpeg4_init_partitions(s);
2755                         }
2756                     }
2757
2758                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2759                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2760
2761                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2762                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2763                         int d = 100 / s->error_rate;
2764                         if(r % d == 0){
2765                             current_packet_size=0;
2766                             s->pb.buf_ptr= s->ptr_lastgob;
2767                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2768                         }
2769                     }
2770
2771                     if (s->avctx->rtp_callback){
2772                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2773                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2774                     }
2775                     update_mb_info(s, 1);
2776
2777                     switch(s->codec_id){
2778                     case AV_CODEC_ID_MPEG4:
2779                         if (CONFIG_MPEG4_ENCODER) {
2780                             ff_mpeg4_encode_video_packet_header(s);
2781                             ff_mpeg4_clean_buffers(s);
2782                         }
2783                     break;
2784                     case AV_CODEC_ID_MPEG1VIDEO:
2785                     case AV_CODEC_ID_MPEG2VIDEO:
2786                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2787                             ff_mpeg1_encode_slice_header(s);
2788                             ff_mpeg1_clean_buffers(s);
2789                         }
2790                     break;
2791                     case AV_CODEC_ID_H263:
2792                     case AV_CODEC_ID_H263P:
2793                         if (CONFIG_H263_ENCODER)
2794                             ff_h263_encode_gob_header(s, mb_y);
2795                     break;
2796                     }
2797
2798                     if(s->flags&CODEC_FLAG_PASS1){
2799                         int bits= put_bits_count(&s->pb);
2800                         s->misc_bits+= bits - s->last_bits;
2801                         s->last_bits= bits;
2802                     }
2803
2804                     s->ptr_lastgob += current_packet_size;
2805                     s->first_slice_line=1;
2806                     s->resync_mb_x=mb_x;
2807                     s->resync_mb_y=mb_y;
2808                 }
2809             }
2810
2811             if(  (s->resync_mb_x   == s->mb_x)
2812                && s->resync_mb_y+1 == s->mb_y){
2813                 s->first_slice_line=0;
2814             }
2815
2816             s->mb_skipped=0;
2817             s->dquant=0; //only for QP_RD
2818
2819             update_mb_info(s, 0);
2820
2821             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2822                 int next_block=0;
2823                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2824
2825                 copy_context_before_encode(&backup_s, s, -1);
2826                 backup_s.pb= s->pb;
2827                 best_s.data_partitioning= s->data_partitioning;
2828                 best_s.partitioned_frame= s->partitioned_frame;
2829                 if(s->data_partitioning){
2830                     backup_s.pb2= s->pb2;
2831                     backup_s.tex_pb= s->tex_pb;
2832                 }
2833
2834                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2835                     s->mv_dir = MV_DIR_FORWARD;
2836                     s->mv_type = MV_TYPE_16X16;
2837                     s->mb_intra= 0;
2838                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2839                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2840                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2841                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2842                 }
2843                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2844                     s->mv_dir = MV_DIR_FORWARD;
2845                     s->mv_type = MV_TYPE_FIELD;
2846                     s->mb_intra= 0;
2847                     for(i=0; i<2; i++){
2848                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2849                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2850                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2851                     }
2852                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2853                                  &dmin, &next_block, 0, 0);
2854                 }
2855                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2856                     s->mv_dir = MV_DIR_FORWARD;
2857                     s->mv_type = MV_TYPE_16X16;
2858                     s->mb_intra= 0;
2859                     s->mv[0][0][0] = 0;
2860                     s->mv[0][0][1] = 0;
2861                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2862                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2863                 }
2864                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2865                     s->mv_dir = MV_DIR_FORWARD;
2866                     s->mv_type = MV_TYPE_8X8;
2867                     s->mb_intra= 0;
2868                     for(i=0; i<4; i++){
2869                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2870                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2871                     }
2872                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2873                                  &dmin, &next_block, 0, 0);
2874                 }
2875                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_16X16;
2878                     s->mb_intra= 0;
2879                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2880                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2881                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2882                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2883                 }
2884                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2885                     s->mv_dir = MV_DIR_BACKWARD;
2886                     s->mv_type = MV_TYPE_16X16;
2887                     s->mb_intra= 0;
2888                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2889                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2890                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2891                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2892                 }
2893                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2894                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2895                     s->mv_type = MV_TYPE_16X16;
2896                     s->mb_intra= 0;
2897                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2898                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2899                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2900                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2901                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2902                                  &dmin, &next_block, 0, 0);
2903                 }
2904                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2905                     s->mv_dir = MV_DIR_FORWARD;
2906                     s->mv_type = MV_TYPE_FIELD;
2907                     s->mb_intra= 0;
2908                     for(i=0; i<2; i++){
2909                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2910                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2911                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2912                     }
2913                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2914                                  &dmin, &next_block, 0, 0);
2915                 }
2916                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2917                     s->mv_dir = MV_DIR_BACKWARD;
2918                     s->mv_type = MV_TYPE_FIELD;
2919                     s->mb_intra= 0;
2920                     for(i=0; i<2; i++){
2921                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2922                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2923                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2924                     }
2925                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2926                                  &dmin, &next_block, 0, 0);
2927                 }
2928                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2929                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2930                     s->mv_type = MV_TYPE_FIELD;
2931                     s->mb_intra= 0;
2932                     for(dir=0; dir<2; dir++){
2933                         for(i=0; i<2; i++){
2934                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2935                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2936                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2937                         }
2938                     }
2939                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2940                                  &dmin, &next_block, 0, 0);
2941                 }
2942                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2943                     s->mv_dir = 0;
2944                     s->mv_type = MV_TYPE_16X16;
2945                     s->mb_intra= 1;
2946                     s->mv[0][0][0] = 0;
2947                     s->mv[0][0][1] = 0;
2948                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2949                                  &dmin, &next_block, 0, 0);
2950                     if(s->h263_pred || s->h263_aic){
2951                         if(best_s.mb_intra)
2952                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2953                         else
2954                             ff_clean_intra_table_entries(s); //old mode?
2955                     }
2956                 }
2957
2958                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2959                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2960                         const int last_qp= backup_s.qscale;
2961                         int qpi, qp, dc[6];
2962                         int16_t ac[6][16];
2963                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2964                         static const int dquant_tab[4]={-1,1,-2,2};
2965                         int storecoefs = s->mb_intra && s->dc_val[0];
2966
2967                         av_assert2(backup_s.dquant == 0);
2968
2969                         //FIXME intra
2970                         s->mv_dir= best_s.mv_dir;
2971                         s->mv_type = MV_TYPE_16X16;
2972                         s->mb_intra= best_s.mb_intra;
2973                         s->mv[0][0][0] = best_s.mv[0][0][0];
2974                         s->mv[0][0][1] = best_s.mv[0][0][1];
2975                         s->mv[1][0][0] = best_s.mv[1][0][0];
2976                         s->mv[1][0][1] = best_s.mv[1][0][1];
2977
2978                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2979                         for(; qpi<4; qpi++){
2980                             int dquant= dquant_tab[qpi];
2981                             qp= last_qp + dquant;
2982                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2983                                 continue;
2984                             backup_s.dquant= dquant;
2985                             if(storecoefs){
2986                                 for(i=0; i<6; i++){
2987                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2988                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2989                                 }
2990                             }
2991
2992                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2993                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2994                             if(best_s.qscale != qp){
2995                                 if(storecoefs){
2996                                     for(i=0; i<6; i++){
2997                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2998                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2999                                     }
3000                                 }
3001                             }
3002                         }
3003                     }
3004                 }
3005                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3006                     int mx= s->b_direct_mv_table[xy][0];
3007                     int my= s->b_direct_mv_table[xy][1];
3008
3009                     backup_s.dquant = 0;
3010                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3011                     s->mb_intra= 0;
3012                     ff_mpeg4_set_direct_mv(s, mx, my);
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, mx, my);
3015                 }
3016                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3017                     backup_s.dquant = 0;
3018                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3019                     s->mb_intra= 0;
3020                     ff_mpeg4_set_direct_mv(s, 0, 0);
3021                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3022                                  &dmin, &next_block, 0, 0);
3023                 }
3024                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3025                     int coded=0;
3026                     for(i=0; i<6; i++)
3027                         coded |= s->block_last_index[i];
3028                     if(coded){
3029                         int mx,my;
3030                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3031                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3032                             mx=my=0; //FIXME find the one we actually used
3033                             ff_mpeg4_set_direct_mv(s, mx, my);
3034                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3035                             mx= s->mv[1][0][0];
3036                             my= s->mv[1][0][1];
3037                         }else{
3038                             mx= s->mv[0][0][0];
3039                             my= s->mv[0][0][1];
3040                         }
3041
3042                         s->mv_dir= best_s.mv_dir;
3043                         s->mv_type = best_s.mv_type;
3044                         s->mb_intra= 0;
3045 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3046                         s->mv[0][0][1] = best_s.mv[0][0][1];
3047                         s->mv[1][0][0] = best_s.mv[1][0][0];
3048                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3049                         backup_s.dquant= 0;
3050                         s->skipdct=1;
3051                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3052                                         &dmin, &next_block, mx, my);
3053                         s->skipdct=0;
3054                     }
3055                 }
3056
3057                 s->current_picture.qscale_table[xy] = best_s.qscale;
3058
3059                 copy_context_after_encode(s, &best_s, -1);
3060
3061                 pb_bits_count= put_bits_count(&s->pb);
3062                 flush_put_bits(&s->pb);
3063                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3064                 s->pb= backup_s.pb;
3065
3066                 if(s->data_partitioning){
3067                     pb2_bits_count= put_bits_count(&s->pb2);
3068                     flush_put_bits(&s->pb2);
3069                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3070                     s->pb2= backup_s.pb2;
3071
3072                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3073                     flush_put_bits(&s->tex_pb);
3074                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3075                     s->tex_pb= backup_s.tex_pb;
3076                 }
3077                 s->last_bits= put_bits_count(&s->pb);
3078
3079                 if (CONFIG_H263_ENCODER &&
3080                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3081                     ff_h263_update_motion_val(s);
3082
3083                 if(next_block==0){ //FIXME 16 vs linesize16
3084                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3085                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3086                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3087                 }
3088
3089                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3090                     ff_MPV_decode_mb(s, s->block);
3091             } else {
3092                 int motion_x = 0, motion_y = 0;
3093                 s->mv_type=MV_TYPE_16X16;
3094                 // only one MB-Type possible
3095
3096                 switch(mb_type){
3097                 case CANDIDATE_MB_TYPE_INTRA:
3098                     s->mv_dir = 0;
3099                     s->mb_intra= 1;
3100                     motion_x= s->mv[0][0][0] = 0;
3101                     motion_y= s->mv[0][0][1] = 0;
3102                     break;
3103                 case CANDIDATE_MB_TYPE_INTER:
3104                     s->mv_dir = MV_DIR_FORWARD;
3105                     s->mb_intra= 0;
3106                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3107                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3108                     break;
3109                 case CANDIDATE_MB_TYPE_INTER_I:
3110                     s->mv_dir = MV_DIR_FORWARD;
3111                     s->mv_type = MV_TYPE_FIELD;
3112                     s->mb_intra= 0;
3113                     for(i=0; i<2; i++){
3114                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3115                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3116                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3117                     }
3118                     break;
3119                 case CANDIDATE_MB_TYPE_INTER4V:
3120                     s->mv_dir = MV_DIR_FORWARD;
3121                     s->mv_type = MV_TYPE_8X8;
3122                     s->mb_intra= 0;
3123                     for(i=0; i<4; i++){
3124                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3125                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3126                     }
3127                     break;
3128                 case CANDIDATE_MB_TYPE_DIRECT:
3129                     if (CONFIG_MPEG4_ENCODER) {
3130                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3131                         s->mb_intra= 0;
3132                         motion_x=s->b_direct_mv_table[xy][0];
3133                         motion_y=s->b_direct_mv_table[xy][1];
3134                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3135                     }
3136                     break;
3137                 case CANDIDATE_MB_TYPE_DIRECT0:
3138                     if (CONFIG_MPEG4_ENCODER) {
3139                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3140                         s->mb_intra= 0;
3141                         ff_mpeg4_set_direct_mv(s, 0, 0);
3142                     }
3143                     break;
3144                 case CANDIDATE_MB_TYPE_BIDIR:
3145                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3146                     s->mb_intra= 0;
3147                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3148                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3149                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3150                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3151                     break;
3152                 case CANDIDATE_MB_TYPE_BACKWARD:
3153                     s->mv_dir = MV_DIR_BACKWARD;
3154                     s->mb_intra= 0;
3155                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3156                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3157                     break;
3158                 case CANDIDATE_MB_TYPE_FORWARD:
3159                     s->mv_dir = MV_DIR_FORWARD;
3160                     s->mb_intra= 0;
3161                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3162                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3163                     break;
3164                 case CANDIDATE_MB_TYPE_FORWARD_I:
3165                     s->mv_dir = MV_DIR_FORWARD;
3166                     s->mv_type = MV_TYPE_FIELD;
3167                     s->mb_intra= 0;
3168                     for(i=0; i<2; i++){
3169                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3170                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3171                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3172                     }
3173                     break;
3174                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3175                     s->mv_dir = MV_DIR_BACKWARD;
3176                     s->mv_type = MV_TYPE_FIELD;
3177                     s->mb_intra= 0;
3178                     for(i=0; i<2; i++){
3179                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3180                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3181                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3182                     }
3183                     break;
3184                 case CANDIDATE_MB_TYPE_BIDIR_I:
3185                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3186                     s->mv_type = MV_TYPE_FIELD;
3187                     s->mb_intra= 0;
3188                     for(dir=0; dir<2; dir++){
3189                         for(i=0; i<2; i++){
3190                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3191                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3192                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3193                         }
3194                     }
3195                     break;
3196                 default:
3197                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3198                 }
3199
3200                 encode_mb(s, motion_x, motion_y);
3201
3202                 // RAL: Update last macroblock type
3203                 s->last_mv_dir = s->mv_dir;
3204
3205                 if (CONFIG_H263_ENCODER &&
3206                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3207                     ff_h263_update_motion_val(s);
3208
3209                 ff_MPV_decode_mb(s, s->block);
3210             }
3211
3212             /* clean the MV table in IPS frames for direct mode in B frames */
3213             if(s->mb_intra /* && I,P,S_TYPE */){
3214                 s->p_mv_table[xy][0]=0;
3215                 s->p_mv_table[xy][1]=0;
3216             }
3217
3218             if(s->flags&CODEC_FLAG_PSNR){
3219                 int w= 16;
3220                 int h= 16;
3221
3222                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3223                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3224
3225                 s->current_picture.f.error[0] += sse(
3226                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3227                     s->dest[0], w, h, s->linesize);
3228                 s->current_picture.f.error[1] += sse(
3229                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3230                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3231                 s->current_picture.f.error[2] += sse(
3232                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3233                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3234             }
3235             if(s->loop_filter){
3236                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3237                     ff_h263_loop_filter(s);
3238             }
3239             av_dlog(s->avctx, "MB %d %d bits\n",
3240                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3241         }
3242     }
3243
3244     //not beautiful here but we must write it before flushing so it has to be here
3245     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3246         ff_msmpeg4_encode_ext_header(s);
3247
3248     write_slice_end(s);
3249
3250     /* Send the last GOB if RTP */
3251     if (s->avctx->rtp_callback) {
3252         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3253         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3254         /* Call the RTP callback to send the last GOB */
3255         emms_c();
3256         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3257     }
3258
3259     return 0;
3260 }
3261
3262 #define MERGE(field) dst->field += src->field; src->field=0
3263 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3264     MERGE(me.scene_change_score);
3265     MERGE(me.mc_mb_var_sum_temp);
3266     MERGE(me.mb_var_sum_temp);
3267 }
3268
3269 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3270     int i;
3271
3272     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3273     MERGE(dct_count[1]);
3274     MERGE(mv_bits);
3275     MERGE(i_tex_bits);
3276     MERGE(p_tex_bits);
3277     MERGE(i_count);
3278     MERGE(f_count);
3279     MERGE(b_count);
3280     MERGE(skip_count);
3281     MERGE(misc_bits);
3282     MERGE(er.error_count);
3283     MERGE(padding_bug_score);
3284     MERGE(current_picture.f.error[0]);
3285     MERGE(current_picture.f.error[1]);
3286     MERGE(current_picture.f.error[2]);
3287
3288     if(dst->avctx->noise_reduction){
3289         for(i=0; i<64; i++){
3290             MERGE(dct_error_sum[0][i]);
3291             MERGE(dct_error_sum[1][i]);
3292         }
3293     }
3294
3295     assert(put_bits_count(&src->pb) % 8 ==0);
3296     assert(put_bits_count(&dst->pb) % 8 ==0);
3297     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3298     flush_put_bits(&dst->pb);
3299 }
3300
3301 static int estimate_qp(MpegEncContext *s, int dry_run){
3302     if (s->next_lambda){
3303         s->current_picture_ptr->f.quality =
3304         s->current_picture.f.quality = s->next_lambda;
3305         if(!dry_run) s->next_lambda= 0;
3306     } else if (!s->fixed_qscale) {
3307         s->current_picture_ptr->f.quality =
3308         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3309         if (s->current_picture.f.quality < 0)
3310             return -1;
3311     }
3312
3313     if(s->adaptive_quant){
3314         switch(s->codec_id){
3315         case AV_CODEC_ID_MPEG4:
3316             if (CONFIG_MPEG4_ENCODER)
3317                 ff_clean_mpeg4_qscales(s);
3318             break;
3319         case AV_CODEC_ID_H263:
3320         case AV_CODEC_ID_H263P:
3321         case AV_CODEC_ID_FLV1:
3322             if (CONFIG_H263_ENCODER)
3323                 ff_clean_h263_qscales(s);
3324             break;
3325         default:
3326             ff_init_qscale_tab(s);
3327         }
3328
3329         s->lambda= s->lambda_table[0];
3330         //FIXME broken
3331     }else
3332         s->lambda = s->current_picture.f.quality;
3333     update_qscale(s);
3334     return 0;
3335 }
3336
3337 /* must be called before writing the header */
3338 static void set_frame_distances(MpegEncContext * s){
3339     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3340     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3341
3342     if(s->pict_type==AV_PICTURE_TYPE_B){
3343         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3344         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3345     }else{
3346         s->pp_time= s->time - s->last_non_b_time;
3347         s->last_non_b_time= s->time;
3348         assert(s->picture_number==0 || s->pp_time > 0);
3349     }
3350 }
3351
3352 static int encode_picture(MpegEncContext *s, int picture_number)
3353 {
3354     int i, ret;
3355     int bits;
3356     int context_count = s->slice_context_count;
3357
3358     s->picture_number = picture_number;
3359
3360     /* Reset the average MB variance */
3361     s->me.mb_var_sum_temp    =
3362     s->me.mc_mb_var_sum_temp = 0;
3363
3364     /* we need to initialize some time vars before we can encode b-frames */
3365     // RAL: Condition added for MPEG1VIDEO
3366     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3367         set_frame_distances(s);
3368     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3369         ff_set_mpeg4_time(s);
3370
3371     s->me.scene_change_score=0;
3372
3373 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3374
3375     if(s->pict_type==AV_PICTURE_TYPE_I){
3376         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3377         else                        s->no_rounding=0;
3378     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3379         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3380             s->no_rounding ^= 1;
3381     }
3382
3383     if(s->flags & CODEC_FLAG_PASS2){
3384         if (estimate_qp(s,1) < 0)
3385             return -1;
3386         ff_get_2pass_fcode(s);
3387     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3388         if(s->pict_type==AV_PICTURE_TYPE_B)
3389             s->lambda= s->last_lambda_for[s->pict_type];
3390         else
3391             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3392         update_qscale(s);
3393     }
3394
3395     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3396         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3397         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3398         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3399         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3400     }
3401
3402     s->mb_intra=0; //for the rate distortion & bit compare functions
3403     for(i=1; i<context_count; i++){
3404         ret = ff_update_duplicate_context(s->thread_context[i], s);
3405         if (ret < 0)
3406             return ret;
3407     }
3408
3409     if(ff_init_me(s)<0)
3410         return -1;
3411
3412     /* Estimate motion for every MB */
3413     if(s->pict_type != AV_PICTURE_TYPE_I){
3414         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3415         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3416         if (s->pict_type != AV_PICTURE_TYPE_B) {
3417             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3418                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3419             }
3420         }
3421
3422         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3423     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3424         /* I-Frame */
3425         for(i=0; i<s->mb_stride*s->mb_height; i++)
3426             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3427
3428         if(!s->fixed_qscale){
3429             /* finding spatial complexity for I-frame rate control */
3430             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3431         }
3432     }
3433     for(i=1; i<context_count; i++){
3434         merge_context_after_me(s, s->thread_context[i]);
3435     }
3436     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3437     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3438     emms_c();
3439
3440     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3441         s->pict_type= AV_PICTURE_TYPE_I;
3442         for(i=0; i<s->mb_stride*s->mb_height; i++)
3443             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3444         if(s->msmpeg4_version >= 3)
3445             s->no_rounding=1;
3446         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3447                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3448     }
3449
3450     if(!s->umvplus){
3451         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3452             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3453
3454             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3455                 int a,b;
3456                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3457                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3458                 s->f_code= FFMAX3(s->f_code, a, b);
3459             }
3460
3461             ff_fix_long_p_mvs(s);
3462             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3463             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3464                 int j;
3465                 for(i=0; i<2; i++){
3466                     for(j=0; j<2; j++)
3467                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3468                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3469                 }
3470             }
3471         }
3472
3473         if(s->pict_type==AV_PICTURE_TYPE_B){
3474             int a, b;
3475
3476             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3477             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3478             s->f_code = FFMAX(a, b);
3479
3480             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3481             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3482             s->b_code = FFMAX(a, b);
3483
3484             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3485             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3486             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3487             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3488             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3489                 int dir, j;
3490                 for(dir=0; dir<2; dir++){
3491                     for(i=0; i<2; i++){
3492                         for(j=0; j<2; j++){
3493                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3494                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3495                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3496                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3497                         }
3498                     }
3499                 }
3500             }
3501         }
3502     }
3503
3504     if (estimate_qp(s, 0) < 0)
3505         return -1;
3506
3507     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3508         s->qscale= 3; //reduce clipping problems
3509
3510     if (s->out_format == FMT_MJPEG) {
3511         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3512         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3513
3514         if (s->avctx->intra_matrix) {
3515             chroma_matrix =
3516             luma_matrix = s->avctx->intra_matrix;
3517         }
3518         if (s->avctx->chroma_intra_matrix)
3519             chroma_matrix = s->avctx->chroma_intra_matrix;
3520
3521         /* for mjpeg, we do include qscale in the matrix */
3522         for(i=1;i<64;i++){
3523             int j= s->dsp.idct_permutation[i];
3524
3525             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3526             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3527         }
3528         s->y_dc_scale_table=
3529         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3530         s->chroma_intra_matrix[0] =
3531         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3532         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3533                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3534         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3535                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3536         s->qscale= 8;
3537     }
3538     if(s->codec_id == AV_CODEC_ID_AMV){
3539         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3540         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3541         for(i=1;i<64;i++){
3542             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3543
3544             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3545             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3546         }
3547         s->y_dc_scale_table= y;
3548         s->c_dc_scale_table= c;
3549         s->intra_matrix[0] = 13;
3550         s->chroma_intra_matrix[0] = 14;
3551         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3552                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3553         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3554                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3555         s->qscale= 8;
3556     }
3557
3558     //FIXME var duplication
3559     s->current_picture_ptr->f.key_frame =
3560     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3561     s->current_picture_ptr->f.pict_type =
3562     s->current_picture.f.pict_type = s->pict_type;
3563
3564     if (s->current_picture.f.key_frame)
3565         s->picture_in_gop_number=0;
3566
3567     s->mb_x = s->mb_y = 0;
3568     s->last_bits= put_bits_count(&s->pb);
3569     switch(s->out_format) {
3570     case FMT_MJPEG:
3571         if (CONFIG_MJPEG_ENCODER)
3572             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3573                                            s->intra_matrix, s->chroma_intra_matrix);
3574         break;
3575     case FMT_H261:
3576         if (CONFIG_H261_ENCODER)
3577             ff_h261_encode_picture_header(s, picture_number);
3578         break;
3579     case FMT_H263:
3580         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3581             ff_wmv2_encode_picture_header(s, picture_number);
3582         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3583             ff_msmpeg4_encode_picture_header(s, picture_number);
3584         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3585             ff_mpeg4_encode_picture_header(s, picture_number);
3586         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3587             ff_rv10_encode_picture_header(s, picture_number);
3588         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3589             ff_rv20_encode_picture_header(s, picture_number);
3590         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3591             ff_flv_encode_picture_header(s, picture_number);
3592         else if (CONFIG_H263_ENCODER)
3593             ff_h263_encode_picture_header(s, picture_number);
3594         break;
3595     case FMT_MPEG1:
3596         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3597             ff_mpeg1_encode_picture_header(s, picture_number);
3598         break;
3599     default:
3600         av_assert0(0);
3601     }
3602     bits= put_bits_count(&s->pb);
3603     s->header_bits= bits - s->last_bits;
3604
3605     for(i=1; i<context_count; i++){
3606         update_duplicate_context_after_me(s->thread_context[i], s);
3607     }
3608     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3609     for(i=1; i<context_count; i++){
3610         merge_context_after_encode(s, s->thread_context[i]);
3611     }
3612     emms_c();
3613     return 0;
3614 }
3615
3616 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3617     const int intra= s->mb_intra;
3618     int i;
3619
3620     s->dct_count[intra]++;
3621
3622     for(i=0; i<64; i++){
3623         int level= block[i];
3624
3625         if(level){
3626             if(level>0){
3627                 s->dct_error_sum[intra][i] += level;
3628                 level -= s->dct_offset[intra][i];
3629                 if(level<0) level=0;
3630             }else{
3631                 s->dct_error_sum[intra][i] -= level;
3632                 level += s->dct_offset[intra][i];
3633                 if(level>0) level=0;
3634             }
3635             block[i]= level;
3636         }
3637     }
3638 }
3639
3640 static int dct_quantize_trellis_c(MpegEncContext *s,
3641                                   int16_t *block, int n,
3642                                   int qscale, int *overflow){
3643     const int *qmat;
3644     const uint8_t *scantable= s->intra_scantable.scantable;
3645     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3646     int max=0;
3647     unsigned int threshold1, threshold2;
3648     int bias=0;
3649     int run_tab[65];
3650     int level_tab[65];
3651     int score_tab[65];
3652     int survivor[65];
3653     int survivor_count;
3654     int last_run=0;
3655     int last_level=0;
3656     int last_score= 0;
3657     int last_i;
3658     int coeff[2][64];
3659     int coeff_count[64];
3660     int qmul, qadd, start_i, last_non_zero, i, dc;
3661     const int esc_length= s->ac_esc_length;
3662     uint8_t * length;
3663     uint8_t * last_length;
3664     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3665
3666     s->dsp.fdct (block);
3667
3668     if(s->dct_error_sum)
3669         s->denoise_dct(s, block);
3670     qmul= qscale*16;
3671     qadd= ((qscale-1)|1)*8;
3672
3673     if (s->mb_intra) {
3674         int q;
3675         if (!s->h263_aic) {
3676             if (n < 4)
3677                 q = s->y_dc_scale;
3678             else
3679                 q = s->c_dc_scale;
3680             q = q << 3;
3681         } else{
3682             /* For AIC we skip quant/dequant of INTRADC */
3683             q = 1 << 3;
3684             qadd=0;
3685         }
3686
3687         /* note: block[0] is assumed to be positive */
3688         block[0] = (block[0] + (q >> 1)) / q;
3689         start_i = 1;
3690         last_non_zero = 0;
3691         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3692         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3693             bias= 1<<(QMAT_SHIFT-1);
3694         length     = s->intra_ac_vlc_length;
3695         last_length= s->intra_ac_vlc_last_length;
3696     } else {
3697         start_i = 0;
3698         last_non_zero = -1;
3699         qmat = s->q_inter_matrix[qscale];
3700         length     = s->inter_ac_vlc_length;
3701         last_length= s->inter_ac_vlc_last_length;
3702     }
3703     last_i= start_i;
3704
3705     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3706     threshold2= (threshold1<<1);
3707
3708     for(i=63; i>=start_i; i--) {
3709         const int j = scantable[i];
3710         int level = block[j] * qmat[j];
3711
3712         if(((unsigned)(level+threshold1))>threshold2){
3713             last_non_zero = i;
3714             break;
3715         }
3716     }
3717
3718     for(i=start_i; i<=last_non_zero; i++) {
3719         const int j = scantable[i];
3720         int level = block[j] * qmat[j];
3721
3722 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3723 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3724         if(((unsigned)(level+threshold1))>threshold2){
3725             if(level>0){
3726                 level= (bias + level)>>QMAT_SHIFT;
3727                 coeff[0][i]= level;
3728                 coeff[1][i]= level-1;
3729 //                coeff[2][k]= level-2;
3730             }else{
3731                 level= (bias - level)>>QMAT_SHIFT;
3732                 coeff[0][i]= -level;
3733                 coeff[1][i]= -level+1;
3734 //                coeff[2][k]= -level+2;
3735             }
3736             coeff_count[i]= FFMIN(level, 2);
3737             av_assert2(coeff_count[i]);
3738             max |=level;
3739         }else{
3740             coeff[0][i]= (level>>31)|1;
3741             coeff_count[i]= 1;
3742         }
3743     }
3744
3745     *overflow= s->max_qcoeff < max; //overflow might have happened
3746
3747     if(last_non_zero < start_i){
3748         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3749         return last_non_zero;
3750     }
3751
3752     score_tab[start_i]= 0;
3753     survivor[0]= start_i;
3754     survivor_count= 1;
3755
3756     for(i=start_i; i<=last_non_zero; i++){
3757         int level_index, j, zero_distortion;
3758         int dct_coeff= FFABS(block[ scantable[i] ]);
3759         int best_score=256*256*256*120;
3760
3761         if (s->dsp.fdct == ff_fdct_ifast)
3762             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3763         zero_distortion= dct_coeff*dct_coeff;
3764
3765         for(level_index=0; level_index < coeff_count[i]; level_index++){
3766             int distortion;
3767             int level= coeff[level_index][i];
3768             const int alevel= FFABS(level);
3769             int unquant_coeff;
3770
3771             av_assert2(level);
3772
3773             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3774                 unquant_coeff= alevel*qmul + qadd;
3775             }else{ //MPEG1
3776                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3777                 if(s->mb_intra){
3778                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3779                         unquant_coeff =   (unquant_coeff - 1) | 1;
3780                 }else{
3781                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3782                         unquant_coeff =   (unquant_coeff - 1) | 1;
3783                 }
3784                 unquant_coeff<<= 3;
3785             }
3786
3787             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3788             level+=64;
3789             if((level&(~127)) == 0){
3790                 for(j=survivor_count-1; j>=0; j--){
3791                     int run= i - survivor[j];
3792                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3793                     score += score_tab[i-run];
3794
3795                     if(score < best_score){
3796                         best_score= score;
3797                         run_tab[i+1]= run;
3798                         level_tab[i+1]= level-64;
3799                     }
3800                 }
3801
3802                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3803                     for(j=survivor_count-1; j>=0; j--){
3804                         int run= i - survivor[j];
3805                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3806                         score += score_tab[i-run];
3807                         if(score < last_score){
3808                             last_score= score;
3809                             last_run= run;
3810                             last_level= level-64;
3811                             last_i= i+1;
3812                         }
3813                     }
3814                 }
3815             }else{
3816                 distortion += esc_length*lambda;
3817                 for(j=survivor_count-1; j>=0; j--){
3818                     int run= i - survivor[j];
3819                     int score= distortion + score_tab[i-run];
3820
3821                     if(score < best_score){
3822                         best_score= score;
3823                         run_tab[i+1]= run;
3824                         level_tab[i+1]= level-64;
3825                     }
3826                 }
3827
3828                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3829                   for(j=survivor_count-1; j>=0; j--){
3830                         int run= i - survivor[j];
3831                         int score= distortion + score_tab[i-run];
3832                         if(score < last_score){
3833                             last_score= score;
3834                             last_run= run;
3835                             last_level= level-64;
3836                             last_i= i+1;
3837                         }
3838                     }
3839                 }
3840             }
3841         }
3842
3843         score_tab[i+1]= best_score;
3844
3845         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3846         if(last_non_zero <= 27){
3847             for(; survivor_count; survivor_count--){
3848                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3849                     break;
3850             }
3851         }else{
3852             for(; survivor_count; survivor_count--){
3853                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3854                     break;
3855             }
3856         }
3857
3858         survivor[ survivor_count++ ]= i+1;
3859     }
3860
3861     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3862         last_score= 256*256*256*120;
3863         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3864             int score= score_tab[i];
3865             if(i) score += lambda*2; //FIXME exacter?
3866
3867             if(score < last_score){
3868                 last_score= score;
3869                 last_i= i;
3870                 last_level= level_tab[i];
3871                 last_run= run_tab[i];
3872             }
3873         }
3874     }
3875
3876     s->coded_score[n] = last_score;
3877
3878     dc= FFABS(block[0]);
3879     last_non_zero= last_i - 1;
3880     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3881
3882     if(last_non_zero < start_i)
3883         return last_non_zero;
3884
3885     if(last_non_zero == 0 && start_i == 0){
3886         int best_level= 0;
3887         int best_score= dc * dc;
3888
3889         for(i=0; i<coeff_count[0]; i++){
3890             int level= coeff[i][0];
3891             int alevel= FFABS(level);
3892             int unquant_coeff, score, distortion;
3893
3894             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3895                     unquant_coeff= (alevel*qmul + qadd)>>3;
3896             }else{ //MPEG1
3897                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3898                     unquant_coeff =   (unquant_coeff - 1) | 1;
3899             }
3900             unquant_coeff = (unquant_coeff + 4) >> 3;
3901             unquant_coeff<<= 3 + 3;
3902
3903             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3904             level+=64;
3905             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3906             else                    score= distortion + esc_length*lambda;
3907
3908             if(score < best_score){
3909                 best_score= score;
3910                 best_level= level - 64;
3911             }
3912         }
3913         block[0]= best_level;
3914         s->coded_score[n] = best_score - dc*dc;
3915         if(best_level == 0) return -1;
3916         else                return last_non_zero;
3917     }
3918
3919     i= last_i;
3920     av_assert2(last_level);
3921
3922     block[ perm_scantable[last_non_zero] ]= last_level;
3923     i -= last_run + 1;
3924
3925     for(; i>start_i; i -= run_tab[i] + 1){
3926         block[ perm_scantable[i-1] ]= level_tab[i];
3927     }
3928
3929     return last_non_zero;
3930 }
3931
3932 //#define REFINE_STATS 1
3933 static int16_t basis[64][64];
3934
3935 static void build_basis(uint8_t *perm){
3936     int i, j, x, y;
3937     emms_c();
3938     for(i=0; i<8; i++){
3939         for(j=0; j<8; j++){
3940             for(y=0; y<8; y++){
3941                 for(x=0; x<8; x++){
3942                     double s= 0.25*(1<<BASIS_SHIFT);
3943                     int index= 8*i + j;
3944                     int perm_index= perm[index];
3945                     if(i==0) s*= sqrt(0.5);
3946                     if(j==0) s*= sqrt(0.5);
3947                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3948                 }
3949             }
3950         }
3951     }
3952 }
3953
3954 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3955                         int16_t *block, int16_t *weight, int16_t *orig,
3956                         int n, int qscale){
3957     int16_t rem[64];
3958     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3959     const uint8_t *scantable= s->intra_scantable.scantable;
3960     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3961 //    unsigned int threshold1, threshold2;
3962 //    int bias=0;
3963     int run_tab[65];
3964     int prev_run=0;
3965     int prev_level=0;
3966     int qmul, qadd, start_i, last_non_zero, i, dc;
3967     uint8_t * length;
3968     uint8_t * last_length;
3969     int lambda;
3970     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3971 #ifdef REFINE_STATS
3972 static int count=0;
3973 static int after_last=0;
3974 static int to_zero=0;
3975 static int from_zero=0;
3976 static int raise=0;
3977 static int lower=0;
3978 static int messed_sign=0;
3979 #endif
3980
3981     if(basis[0][0] == 0)
3982         build_basis(s->dsp.idct_permutation);
3983
3984     qmul= qscale*2;
3985     qadd= (qscale-1)|1;
3986     if (s->mb_intra) {
3987         if (!s->h263_aic) {
3988             if (n < 4)
3989                 q = s->y_dc_scale;
3990             else
3991                 q = s->c_dc_scale;
3992         } else{
3993             /* For AIC we skip quant/dequant of INTRADC */
3994             q = 1;
3995             qadd=0;
3996         }
3997         q <<= RECON_SHIFT-3;
3998         /* note: block[0] is assumed to be positive */
3999         dc= block[0]*q;
4000 //        block[0] = (block[0] + (q >> 1)) / q;
4001         start_i = 1;
4002 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4003 //            bias= 1<<(QMAT_SHIFT-1);
4004         length     = s->intra_ac_vlc_length;
4005         last_length= s->intra_ac_vlc_last_length;
4006     } else {
4007         dc= 0;
4008         start_i = 0;
4009         length     = s->inter_ac_vlc_length;
4010         last_length= s->inter_ac_vlc_last_length;
4011     }
4012     last_non_zero = s->block_last_index[n];
4013
4014 #ifdef REFINE_STATS
4015 {START_TIMER
4016 #endif
4017     dc += (1<<(RECON_SHIFT-1));
4018     for(i=0; i<64; i++){
4019         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4020     }
4021 #ifdef REFINE_STATS
4022 STOP_TIMER("memset rem[]")}
4023 #endif
4024     sum=0;
4025     for(i=0; i<64; i++){
4026         int one= 36;
4027         int qns=4;
4028         int w;
4029
4030         w= FFABS(weight[i]) + qns*one;
4031         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4032
4033         weight[i] = w;
4034 //        w=weight[i] = (63*qns + (w/2)) / w;
4035
4036         av_assert2(w>0);
4037         av_assert2(w<(1<<6));
4038         sum += w*w;
4039     }
4040     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4041 #ifdef REFINE_STATS
4042 {START_TIMER
4043 #endif
4044     run=0;
4045     rle_index=0;
4046     for(i=start_i; i<=last_non_zero; i++){
4047         int j= perm_scantable[i];
4048         const int level= block[j];
4049         int coeff;
4050
4051         if(level){
4052             if(level<0) coeff= qmul*level - qadd;
4053             else        coeff= qmul*level + qadd;
4054             run_tab[rle_index++]=run;
4055             run=0;
4056
4057             s->dsp.add_8x8basis(rem, basis[j], coeff);
4058         }else{
4059             run++;
4060         }
4061     }
4062 #ifdef REFINE_STATS
4063 if(last_non_zero>0){
4064 STOP_TIMER("init rem[]")
4065 }
4066 }
4067
4068 {START_TIMER
4069 #endif
4070     for(;;){
4071         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4072         int best_coeff=0;
4073         int best_change=0;
4074         int run2, best_unquant_change=0, analyze_gradient;
4075 #ifdef REFINE_STATS
4076 {START_TIMER
4077 #endif
4078         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4079
4080         if(analyze_gradient){
4081 #ifdef REFINE_STATS
4082 {START_TIMER
4083 #endif
4084             for(i=0; i<64; i++){
4085                 int w= weight[i];
4086
4087                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4088             }
4089 #ifdef REFINE_STATS
4090 STOP_TIMER("rem*w*w")}
4091 {START_TIMER
4092 #endif
4093             s->dsp.fdct(d1);
4094 #ifdef REFINE_STATS
4095 STOP_TIMER("dct")}
4096 #endif
4097         }
4098
4099         if(start_i){
4100             const int level= block[0];
4101             int change, old_coeff;
4102
4103             av_assert2(s->mb_intra);
4104
4105             old_coeff= q*level;
4106
4107             for(change=-1; change<=1; change+=2){
4108                 int new_level= level + change;
4109                 int score, new_coeff;
4110
4111                 new_coeff= q*new_level;
4112                 if(new_coeff >= 2048 || new_coeff < 0)
4113                     continue;
4114
4115                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4116                 if(score<best_score){
4117                     best_score= score;
4118                     best_coeff= 0;
4119                     best_change= change;
4120                     best_unquant_change= new_coeff - old_coeff;
4121                 }
4122             }
4123         }
4124
4125         run=0;
4126         rle_index=0;
4127         run2= run_tab[rle_index++];
4128         prev_level=0;
4129         prev_run=0;
4130
4131         for(i=start_i; i<64; i++){
4132             int j= perm_scantable[i];
4133             const int level= block[j];
4134             int change, old_coeff;
4135
4136             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4137                 break;
4138
4139             if(level){
4140                 if(level<0) old_coeff= qmul*level - qadd;
4141                 else        old_coeff= qmul*level + qadd;
4142                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4143             }else{
4144                 old_coeff=0;
4145                 run2--;
4146                 av_assert2(run2>=0 || i >= last_non_zero );
4147             }
4148
4149             for(change=-1; change<=1; change+=2){
4150                 int new_level= level + change;
4151                 int score, new_coeff, unquant_change;
4152
4153                 score=0;
4154                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4155                    continue;
4156
4157                 if(new_level){
4158                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4159                     else            new_coeff= qmul*new_level + qadd;
4160                     if(new_coeff >= 2048 || new_coeff <= -2048)
4161                         continue;
4162                     //FIXME check for overflow
4163
4164                     if(level){
4165                         if(level < 63 && level > -63){
4166                             if(i < last_non_zero)
4167                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4168                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4169                             else
4170                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4171                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4172                         }
4173                     }else{
4174                         av_assert2(FFABS(new_level)==1);
4175
4176                         if(analyze_gradient){
4177                             int g= d1[ scantable[i] ];
4178                             if(g && (g^new_level) >= 0)
4179                                 continue;
4180                         }
4181
4182                         if(i < last_non_zero){
4183                             int next_i= i + run2 + 1;
4184                             int next_level= block[ perm_scantable[next_i] ] + 64;
4185
4186                             if(next_level&(~127))
4187                                 next_level= 0;
4188
4189                             if(next_i < last_non_zero)
4190                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4191                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4192                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4193                             else
4194                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4195                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4196                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4197                         }else{
4198                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4199                             if(prev_level){
4200                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4201                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4202                             }
4203                         }
4204                     }
4205                 }else{
4206                     new_coeff=0;
4207                     av_assert2(FFABS(level)==1);
4208
4209                     if(i < last_non_zero){
4210                         int next_i= i + run2 + 1;
4211                         int next_level= block[ perm_scantable[next_i] ] + 64;
4212
4213                         if(next_level&(~127))
4214                             next_level= 0;
4215
4216                         if(next_i < last_non_zero)
4217                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4218                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4219                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4220                         else
4221                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4222                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4223                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4224                     }else{
4225                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4226                         if(prev_level){
4227                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4228                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4229                         }
4230                     }
4231                 }
4232
4233                 score *= lambda;
4234
4235                 unquant_change= new_coeff - old_coeff;
4236                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4237
4238                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4239                 if(score<best_score){
4240                     best_score= score;
4241                     best_coeff= i;
4242                     best_change= change;
4243                     best_unquant_change= unquant_change;
4244                 }
4245             }
4246             if(level){
4247                 prev_level= level + 64;
4248                 if(prev_level&(~127))
4249                     prev_level= 0;
4250                 prev_run= run;
4251                 run=0;
4252             }else{
4253                 run++;
4254             }
4255         }
4256 #ifdef REFINE_STATS
4257 STOP_TIMER("iterative step")}
4258 #endif
4259
4260         if(best_change){
4261             int j= perm_scantable[ best_coeff ];
4262
4263             block[j] += best_change;
4264
4265             if(best_coeff > last_non_zero){
4266                 last_non_zero= best_coeff;
4267                 av_assert2(block[j]);
4268 #ifdef REFINE_STATS
4269 after_last++;
4270 #endif
4271             }else{
4272 #ifdef REFINE_STATS
4273 if(block[j]){
4274     if(block[j] - best_change){
4275         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4276             raise++;
4277         }else{
4278             lower++;
4279         }
4280     }else{
4281         from_zero++;
4282     }
4283 }else{
4284     to_zero++;
4285 }
4286 #endif
4287                 for(; last_non_zero>=start_i; last_non_zero--){
4288                     if(block[perm_scantable[last_non_zero]])
4289                         break;
4290                 }
4291             }
4292 #ifdef REFINE_STATS
4293 count++;
4294 if(256*256*256*64 % count == 0){
4295     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4296 }
4297 #endif
4298             run=0;
4299             rle_index=0;
4300             for(i=start_i; i<=last_non_zero; i++){
4301                 int j= perm_scantable[i];
4302                 const int level= block[j];
4303
4304                  if(level){
4305                      run_tab[rle_index++]=run;
4306                      run=0;
4307                  }else{
4308                      run++;
4309                  }
4310             }
4311
4312             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4313         }else{
4314             break;
4315         }
4316     }
4317 #ifdef REFINE_STATS
4318 if(last_non_zero>0){
4319 STOP_TIMER("iterative search")
4320 }
4321 }
4322 #endif
4323
4324     return last_non_zero;
4325 }
4326
4327 int ff_dct_quantize_c(MpegEncContext *s,
4328                         int16_t *block, int n,
4329                         int qscale, int *overflow)
4330 {
4331     int i, j, level, last_non_zero, q, start_i;
4332     const int *qmat;
4333     const uint8_t *scantable= s->intra_scantable.scantable;
4334     int bias;
4335     int max=0;
4336     unsigned int threshold1, threshold2;
4337
4338     s->dsp.fdct (block);
4339
4340     if(s->dct_error_sum)
4341         s->denoise_dct(s, block);
4342
4343     if (s->mb_intra) {
4344         if (!s->h263_aic) {
4345             if (n < 4)
4346                 q = s->y_dc_scale;
4347             else
4348                 q = s->c_dc_scale;
4349             q = q << 3;
4350         } else
4351             /* For AIC we skip quant/dequant of INTRADC */
4352             q = 1 << 3;
4353
4354         /* note: block[0] is assumed to be positive */
4355         block[0] = (block[0] + (q >> 1)) / q;
4356         start_i = 1;
4357         last_non_zero = 0;
4358         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4359         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4360     } else {
4361         start_i = 0;
4362         last_non_zero = -1;
4363         qmat = s->q_inter_matrix[qscale];
4364         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4365     }
4366     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4367     threshold2= (threshold1<<1);
4368     for(i=63;i>=start_i;i--) {
4369         j = scantable[i];
4370         level = block[j] * qmat[j];
4371
4372         if(((unsigned)(level+threshold1))>threshold2){
4373             last_non_zero = i;
4374             break;
4375         }else{
4376             block[j]=0;
4377         }
4378     }
4379     for(i=start_i; i<=last_non_zero; i++) {
4380         j = scantable[i];
4381         level = block[j] * qmat[j];
4382
4383 //        if(   bias+level >= (1<<QMAT_SHIFT)
4384 //           || bias-level >= (1<<QMAT_SHIFT)){
4385         if(((unsigned)(level+threshold1))>threshold2){
4386             if(level>0){
4387                 level= (bias + level)>>QMAT_SHIFT;
4388                 block[j]= level;
4389             }else{
4390                 level= (bias - level)>>QMAT_SHIFT;
4391                 block[j]= -level;
4392             }
4393             max |=level;
4394         }else{
4395             block[j]=0;
4396         }
4397     }
4398     *overflow= s->max_qcoeff < max; //overflow might have happened
4399
4400     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4401     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4402         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4403
4404     return last_non_zero;
4405 }
4406
4407 #define OFFSET(x) offsetof(MpegEncContext, x)
4408 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4409 static const AVOption h263_options[] = {
4410     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4411     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4412     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4413     FF_MPV_COMMON_OPTS
4414     { NULL },
4415 };
4416
4417 static const AVClass h263_class = {
4418     .class_name = "H.263 encoder",
4419     .item_name  = av_default_item_name,
4420     .option     = h263_options,
4421     .version    = LIBAVUTIL_VERSION_INT,
4422 };
4423
4424 AVCodec ff_h263_encoder = {
4425     .name           = "h263",
4426     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4427     .type           = AVMEDIA_TYPE_VIDEO,
4428     .id             = AV_CODEC_ID_H263,
4429     .priv_data_size = sizeof(MpegEncContext),
4430     .init           = ff_MPV_encode_init,
4431     .encode2        = ff_MPV_encode_picture,
4432     .close          = ff_MPV_encode_end,
4433     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4434     .priv_class     = &h263_class,
4435 };
4436
4437 static const AVOption h263p_options[] = {
4438     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4439     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4440     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4441     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4442     FF_MPV_COMMON_OPTS
4443     { NULL },
4444 };
4445 static const AVClass h263p_class = {
4446     .class_name = "H.263p encoder",
4447     .item_name  = av_default_item_name,
4448     .option     = h263p_options,
4449     .version    = LIBAVUTIL_VERSION_INT,
4450 };
4451
4452 AVCodec ff_h263p_encoder = {
4453     .name           = "h263p",
4454     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4455     .type           = AVMEDIA_TYPE_VIDEO,
4456     .id             = AV_CODEC_ID_H263P,
4457     .priv_data_size = sizeof(MpegEncContext),
4458     .init           = ff_MPV_encode_init,
4459     .encode2        = ff_MPV_encode_picture,
4460     .close          = ff_MPV_encode_end,
4461     .capabilities   = CODEC_CAP_SLICE_THREADS,
4462     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4463     .priv_class     = &h263p_class,
4464 };
4465
4466 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4467
4468 AVCodec ff_msmpeg4v2_encoder = {
4469     .name           = "msmpeg4v2",
4470     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4471     .type           = AVMEDIA_TYPE_VIDEO,
4472     .id             = AV_CODEC_ID_MSMPEG4V2,
4473     .priv_data_size = sizeof(MpegEncContext),
4474     .init           = ff_MPV_encode_init,
4475     .encode2        = ff_MPV_encode_picture,
4476     .close          = ff_MPV_encode_end,
4477     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4478     .priv_class     = &msmpeg4v2_class,
4479 };
4480
4481 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4482
4483 AVCodec ff_msmpeg4v3_encoder = {
4484     .name           = "msmpeg4",
4485     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4486     .type           = AVMEDIA_TYPE_VIDEO,
4487     .id             = AV_CODEC_ID_MSMPEG4V3,
4488     .priv_data_size = sizeof(MpegEncContext),
4489     .init           = ff_MPV_encode_init,
4490     .encode2        = ff_MPV_encode_picture,
4491     .close          = ff_MPV_encode_end,
4492     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4493     .priv_class     = &msmpeg4v3_class,
4494 };
4495
4496 FF_MPV_GENERIC_CLASS(wmv1)
4497
4498 AVCodec ff_wmv1_encoder = {
4499     .name           = "wmv1",
4500     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4501     .type           = AVMEDIA_TYPE_VIDEO,
4502     .id             = AV_CODEC_ID_WMV1,
4503     .priv_data_size = sizeof(MpegEncContext),
4504     .init           = ff_MPV_encode_init,
4505     .encode2        = ff_MPV_encode_picture,
4506     .close          = ff_MPV_encode_end,
4507     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4508     .priv_class     = &wmv1_class,
4509 };