]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'c0329748b04e1f175dad8c9c2ebf22a5e2dc5b72'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mathops.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 av_cold int ff_dct_encode_init(MpegEncContext *s) {
272     if (ARCH_X86)
273         ff_dct_encode_init_x86(s);
274
275     if (!s->dct_quantize)
276         s->dct_quantize = ff_dct_quantize_c;
277     if (!s->denoise_dct)
278         s->denoise_dct  = denoise_dct_c;
279     s->fast_dct_quantize = s->dct_quantize;
280     if (s->avctx->trellis)
281         s->dct_quantize  = dct_quantize_trellis_c;
282
283     return 0;
284 }
285
286 /* init video encoder */
287 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
288 {
289     MpegEncContext *s = avctx->priv_data;
290     int i;
291     int chroma_h_shift, chroma_v_shift;
292
293     MPV_encode_defaults(s);
294
295     switch (avctx->codec_id) {
296     case AV_CODEC_ID_MPEG2VIDEO:
297         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
298             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
299             av_log(avctx, AV_LOG_ERROR,
300                    "only YUV420 and YUV422 are supported\n");
301             return -1;
302         }
303         break;
304     case AV_CODEC_ID_LJPEG:
305         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
306             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
307             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
308             avctx->pix_fmt != AV_PIX_FMT_BGR0     &&
309             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
310             avctx->pix_fmt != AV_PIX_FMT_BGR24    &&
311             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
312               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
313               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
314              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
315             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
316             return -1;
317         }
318         break;
319     case AV_CODEC_ID_MJPEG:
320     case AV_CODEC_ID_AMV:
321         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
322             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
323             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
324               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
325              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
326             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
327             return -1;
328         }
329         break;
330     default:
331         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
332             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
333             return -1;
334         }
335     }
336
337     switch (avctx->pix_fmt) {
338     case AV_PIX_FMT_YUVJ422P:
339     case AV_PIX_FMT_YUV422P:
340         s->chroma_format = CHROMA_422;
341         break;
342     case AV_PIX_FMT_YUVJ420P:
343     case AV_PIX_FMT_YUV420P:
344     default:
345         s->chroma_format = CHROMA_420;
346         break;
347     }
348
349     s->bit_rate = avctx->bit_rate;
350     s->width    = avctx->width;
351     s->height   = avctx->height;
352     if (avctx->gop_size > 600 &&
353         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
354         av_log(avctx, AV_LOG_WARNING,
355                "keyframe interval too large!, reducing it from %d to %d\n",
356                avctx->gop_size, 600);
357         avctx->gop_size = 600;
358     }
359     s->gop_size     = avctx->gop_size;
360     s->avctx        = avctx;
361     s->flags        = avctx->flags;
362     s->flags2       = avctx->flags2;
363     s->max_b_frames = avctx->max_b_frames;
364     s->codec_id     = avctx->codec->id;
365 #if FF_API_MPV_GLOBAL_OPTS
366     if (avctx->luma_elim_threshold)
367         s->luma_elim_threshold   = avctx->luma_elim_threshold;
368     if (avctx->chroma_elim_threshold)
369         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
370 #endif
371     s->strict_std_compliance = avctx->strict_std_compliance;
372     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
373     s->mpeg_quant         = avctx->mpeg_quant;
374     s->rtp_mode           = !!avctx->rtp_payload_size;
375     s->intra_dc_precision = avctx->intra_dc_precision;
376     s->user_specified_pts = AV_NOPTS_VALUE;
377
378     if (s->gop_size <= 1) {
379         s->intra_only = 1;
380         s->gop_size   = 12;
381     } else {
382         s->intra_only = 0;
383     }
384
385     s->me_method = avctx->me_method;
386
387     /* Fixed QSCALE */
388     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
389
390 #if FF_API_MPV_GLOBAL_OPTS
391     if (s->flags & CODEC_FLAG_QP_RD)
392         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
393 #endif
394
395     s->adaptive_quant = (s->avctx->lumi_masking ||
396                          s->avctx->dark_masking ||
397                          s->avctx->temporal_cplx_masking ||
398                          s->avctx->spatial_cplx_masking  ||
399                          s->avctx->p_masking      ||
400                          s->avctx->border_masking ||
401                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
402                         !s->fixed_qscale;
403
404     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
405
406     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
407         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
408         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
409             return -1;
410     }
411
412     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
415     }
416
417     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
418         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
419         return -1;
420     }
421
422     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
423         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
424         return -1;
425     }
426
427     if (avctx->rc_max_rate &&
428         avctx->rc_max_rate == avctx->bit_rate &&
429         avctx->rc_max_rate != avctx->rc_min_rate) {
430         av_log(avctx, AV_LOG_INFO,
431                "impossible bitrate constraints, this will fail\n");
432     }
433
434     if (avctx->rc_buffer_size &&
435         avctx->bit_rate * (int64_t)avctx->time_base.num >
436             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
437         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
438         return -1;
439     }
440
441     if (!s->fixed_qscale &&
442         avctx->bit_rate * av_q2d(avctx->time_base) >
443             avctx->bit_rate_tolerance) {
444         av_log(avctx, AV_LOG_ERROR,
445                "bitrate tolerance too small for bitrate\n");
446         return -1;
447     }
448
449     if (s->avctx->rc_max_rate &&
450         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
451         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
452          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
453         90000LL * (avctx->rc_buffer_size - 1) >
454             s->avctx->rc_max_rate * 0xFFFFLL) {
455         av_log(avctx, AV_LOG_INFO,
456                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
457                "specified vbv buffer is too large for the given bitrate!\n");
458     }
459
460     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
461         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
462         s->codec_id != AV_CODEC_ID_FLV1) {
463         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
464         return -1;
465     }
466
467     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
468         av_log(avctx, AV_LOG_ERROR,
469                "OBMC is only supported with simple mb decision\n");
470         return -1;
471     }
472
473     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
474         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
475         return -1;
476     }
477
478     if (s->max_b_frames                    &&
479         s->codec_id != AV_CODEC_ID_MPEG4      &&
480         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
481         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
482         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
483         return -1;
484     }
485
486     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
487          s->codec_id == AV_CODEC_ID_H263  ||
488          s->codec_id == AV_CODEC_ID_H263P) &&
489         (avctx->sample_aspect_ratio.num > 255 ||
490          avctx->sample_aspect_ratio.den > 255)) {
491         av_log(avctx, AV_LOG_WARNING,
492                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
493                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
494         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
495                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
496     }
497
498     if ((s->codec_id == AV_CODEC_ID_H263  ||
499          s->codec_id == AV_CODEC_ID_H263P) &&
500         (avctx->width  > 2048 ||
501          avctx->height > 1152 )) {
502         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
503         return -1;
504     }
505     if ((s->codec_id == AV_CODEC_ID_H263  ||
506          s->codec_id == AV_CODEC_ID_H263P) &&
507         ((avctx->width &3) ||
508          (avctx->height&3) )) {
509         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
510         return -1;
511     }
512
513     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
514         (avctx->width  > 4095 ||
515          avctx->height > 4095 )) {
516         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
517         return -1;
518     }
519
520     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
521         (avctx->width  > 16383 ||
522          avctx->height > 16383 )) {
523         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
524         return -1;
525     }
526
527     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
528          s->codec_id == AV_CODEC_ID_WMV2) &&
529          avctx->width & 1) {
530          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
531          return -1;
532     }
533
534     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
535         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
536         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
537         return -1;
538     }
539
540     // FIXME mpeg2 uses that too
541     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
542         av_log(avctx, AV_LOG_ERROR,
543                "mpeg2 style quantization not supported by codec\n");
544         return -1;
545     }
546
547 #if FF_API_MPV_GLOBAL_OPTS
548     if (s->flags & CODEC_FLAG_CBP_RD)
549         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
550 #endif
551
552     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
553         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
554         return -1;
555     }
556
557     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
558         s->avctx->mb_decision != FF_MB_DECISION_RD) {
559         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
560         return -1;
561     }
562
563     if (s->avctx->scenechange_threshold < 1000000000 &&
564         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
565         av_log(avctx, AV_LOG_ERROR,
566                "closed gop with scene change detection are not supported yet, "
567                "set threshold to 1000000000\n");
568         return -1;
569     }
570
571     if (s->flags & CODEC_FLAG_LOW_DELAY) {
572         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
573             av_log(avctx, AV_LOG_ERROR,
574                   "low delay forcing is only available for mpeg2\n");
575             return -1;
576         }
577         if (s->max_b_frames != 0) {
578             av_log(avctx, AV_LOG_ERROR,
579                    "b frames cannot be used with low delay\n");
580             return -1;
581         }
582     }
583
584     if (s->q_scale_type == 1) {
585         if (avctx->qmax > 12) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "non linear quant only supports qmax <= 12 currently\n");
588             return -1;
589         }
590     }
591
592     if (s->avctx->thread_count > 1         &&
593         s->codec_id != AV_CODEC_ID_MPEG4      &&
594         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
595         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
596         s->codec_id != AV_CODEC_ID_MJPEG      &&
597         (s->codec_id != AV_CODEC_ID_H263P)) {
598         av_log(avctx, AV_LOG_ERROR,
599                "multi threaded encoding not supported by codec\n");
600         return -1;
601     }
602
603     if (s->avctx->thread_count < 1) {
604         av_log(avctx, AV_LOG_ERROR,
605                "automatic thread number detection not supported by codec, "
606                "patch welcome\n");
607         return -1;
608     }
609
610     if (s->avctx->thread_count > 1)
611         s->rtp_mode = 1;
612
613     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
614         s->h263_slice_structured = 1;
615
616     if (!avctx->time_base.den || !avctx->time_base.num) {
617         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
618         return -1;
619     }
620
621     i = (INT_MAX / 2 + 128) >> 8;
622     if (avctx->me_threshold >= i) {
623         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
624                i - 1);
625         return -1;
626     }
627     if (avctx->mb_threshold >= i) {
628         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
629                i - 1);
630         return -1;
631     }
632
633     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
634         av_log(avctx, AV_LOG_INFO,
635                "notice: b_frame_strategy only affects the first pass\n");
636         avctx->b_frame_strategy = 0;
637     }
638
639     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
640     if (i > 1) {
641         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
642         avctx->time_base.den /= i;
643         avctx->time_base.num /= i;
644         //return -1;
645     }
646
647     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
648         // (a + x * 3 / 8) / x
649         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
650         s->inter_quant_bias = 0;
651     } else {
652         s->intra_quant_bias = 0;
653         // (a - x / 4) / x
654         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
655     }
656
657     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
658         s->intra_quant_bias = avctx->intra_quant_bias;
659     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
660         s->inter_quant_bias = avctx->inter_quant_bias;
661
662     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
663
664     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
665                                   &chroma_v_shift);
666
667     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
668         s->avctx->time_base.den > (1 << 16) - 1) {
669         av_log(avctx, AV_LOG_ERROR,
670                "timebase %d/%d not supported by MPEG 4 standard, "
671                "the maximum admitted value for the timebase denominator "
672                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
673                (1 << 16) - 1);
674         return -1;
675     }
676     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
677
678 #if FF_API_MPV_GLOBAL_OPTS
679     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
680         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
681     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
682         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
683     if (avctx->quantizer_noise_shaping)
684         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
685 #endif
686
687     switch (avctx->codec->id) {
688     case AV_CODEC_ID_MPEG1VIDEO:
689         s->out_format = FMT_MPEG1;
690         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
691         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
692         break;
693     case AV_CODEC_ID_MPEG2VIDEO:
694         s->out_format = FMT_MPEG1;
695         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
696         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
697         s->rtp_mode   = 1;
698         break;
699     case AV_CODEC_ID_LJPEG:
700     case AV_CODEC_ID_MJPEG:
701     case AV_CODEC_ID_AMV:
702         s->out_format = FMT_MJPEG;
703         s->intra_only = 1; /* force intra only for jpeg */
704         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
705             (avctx->pix_fmt == AV_PIX_FMT_BGR0
706              || s->avctx->pix_fmt == AV_PIX_FMT_BGRA
707              || s->avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
708             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
709             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
710             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
711         } else {
712             s->mjpeg_vsample[0] = 2;
713             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
714             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
715             s->mjpeg_hsample[0] = 2;
716             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
717             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
718         }
719         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
720             ff_mjpeg_encode_init(s) < 0)
721             return -1;
722         avctx->delay = 0;
723         s->low_delay = 1;
724         break;
725     case AV_CODEC_ID_H261:
726         if (!CONFIG_H261_ENCODER)
727             return -1;
728         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
729             av_log(avctx, AV_LOG_ERROR,
730                    "The specified picture size of %dx%d is not valid for the "
731                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
732                     s->width, s->height);
733             return -1;
734         }
735         s->out_format = FMT_H261;
736         avctx->delay  = 0;
737         s->low_delay  = 1;
738         break;
739     case AV_CODEC_ID_H263:
740         if (!CONFIG_H263_ENCODER)
741             return -1;
742         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
743                              s->width, s->height) == 8) {
744             av_log(avctx, AV_LOG_ERROR,
745                    "The specified picture size of %dx%d is not valid for "
746                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
747                    "352x288, 704x576, and 1408x1152. "
748                    "Try H.263+.\n", s->width, s->height);
749             return -1;
750         }
751         s->out_format = FMT_H263;
752         avctx->delay  = 0;
753         s->low_delay  = 1;
754         break;
755     case AV_CODEC_ID_H263P:
756         s->out_format = FMT_H263;
757         s->h263_plus  = 1;
758         /* Fx */
759         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
760         s->modified_quant  = s->h263_aic;
761         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
762         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
763
764         /* /Fx */
765         /* These are just to be sure */
766         avctx->delay = 0;
767         s->low_delay = 1;
768         break;
769     case AV_CODEC_ID_FLV1:
770         s->out_format      = FMT_H263;
771         s->h263_flv        = 2; /* format = 1; 11-bit codes */
772         s->unrestricted_mv = 1;
773         s->rtp_mode  = 0; /* don't allow GOB */
774         avctx->delay = 0;
775         s->low_delay = 1;
776         break;
777     case AV_CODEC_ID_RV10:
778         s->out_format = FMT_H263;
779         avctx->delay  = 0;
780         s->low_delay  = 1;
781         break;
782     case AV_CODEC_ID_RV20:
783         s->out_format      = FMT_H263;
784         avctx->delay       = 0;
785         s->low_delay       = 1;
786         s->modified_quant  = 1;
787         s->h263_aic        = 1;
788         s->h263_plus       = 1;
789         s->loop_filter     = 1;
790         s->unrestricted_mv = 0;
791         break;
792     case AV_CODEC_ID_MPEG4:
793         s->out_format      = FMT_H263;
794         s->h263_pred       = 1;
795         s->unrestricted_mv = 1;
796         s->low_delay       = s->max_b_frames ? 0 : 1;
797         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
798         break;
799     case AV_CODEC_ID_MSMPEG4V2:
800         s->out_format      = FMT_H263;
801         s->h263_pred       = 1;
802         s->unrestricted_mv = 1;
803         s->msmpeg4_version = 2;
804         avctx->delay       = 0;
805         s->low_delay       = 1;
806         break;
807     case AV_CODEC_ID_MSMPEG4V3:
808         s->out_format        = FMT_H263;
809         s->h263_pred         = 1;
810         s->unrestricted_mv   = 1;
811         s->msmpeg4_version   = 3;
812         s->flipflop_rounding = 1;
813         avctx->delay         = 0;
814         s->low_delay         = 1;
815         break;
816     case AV_CODEC_ID_WMV1:
817         s->out_format        = FMT_H263;
818         s->h263_pred         = 1;
819         s->unrestricted_mv   = 1;
820         s->msmpeg4_version   = 4;
821         s->flipflop_rounding = 1;
822         avctx->delay         = 0;
823         s->low_delay         = 1;
824         break;
825     case AV_CODEC_ID_WMV2:
826         s->out_format        = FMT_H263;
827         s->h263_pred         = 1;
828         s->unrestricted_mv   = 1;
829         s->msmpeg4_version   = 5;
830         s->flipflop_rounding = 1;
831         avctx->delay         = 0;
832         s->low_delay         = 1;
833         break;
834     default:
835         return -1;
836     }
837
838     avctx->has_b_frames = !s->low_delay;
839
840     s->encoding = 1;
841
842     s->progressive_frame    =
843     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
844                                                 CODEC_FLAG_INTERLACED_ME) ||
845                                 s->alternate_scan);
846
847     /* init */
848     if (ff_MPV_common_init(s) < 0)
849         return -1;
850
851     ff_dct_encode_init(s);
852
853     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
854         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
855
856     s->quant_precision = 5;
857
858     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
859     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
860
861     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
862         ff_h261_encode_init(s);
863     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
864         ff_h263_encode_init(s);
865     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
866         ff_msmpeg4_encode_init(s);
867     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
868         && s->out_format == FMT_MPEG1)
869         ff_mpeg1_encode_init(s);
870
871     /* init q matrix */
872     for (i = 0; i < 64; i++) {
873         int j = s->dsp.idct_permutation[i];
874         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
875             s->mpeg_quant) {
876             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
877             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
878         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
879             s->intra_matrix[j] =
880             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
881         } else {
882             /* mpeg1/2 */
883             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
884             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
885         }
886         if (s->avctx->intra_matrix)
887             s->intra_matrix[j] = s->avctx->intra_matrix[i];
888         if (s->avctx->inter_matrix)
889             s->inter_matrix[j] = s->avctx->inter_matrix[i];
890     }
891
892     /* precompute matrix */
893     /* for mjpeg, we do include qscale in the matrix */
894     if (s->out_format != FMT_MJPEG) {
895         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
896                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
897                           31, 1);
898         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
899                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
900                           31, 0);
901     }
902
903     if (ff_rate_control_init(s) < 0)
904         return -1;
905
906     return 0;
907 }
908
909 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
910 {
911     MpegEncContext *s = avctx->priv_data;
912
913     ff_rate_control_uninit(s);
914
915     ff_MPV_common_end(s);
916     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
917         s->out_format == FMT_MJPEG)
918         ff_mjpeg_encode_close(s);
919
920     av_freep(&avctx->extradata);
921
922     return 0;
923 }
924
925 static int get_sae(uint8_t *src, int ref, int stride)
926 {
927     int x,y;
928     int acc = 0;
929
930     for (y = 0; y < 16; y++) {
931         for (x = 0; x < 16; x++) {
932             acc += FFABS(src[x + y * stride] - ref);
933         }
934     }
935
936     return acc;
937 }
938
939 static int get_intra_count(MpegEncContext *s, uint8_t *src,
940                            uint8_t *ref, int stride)
941 {
942     int x, y, w, h;
943     int acc = 0;
944
945     w = s->width  & ~15;
946     h = s->height & ~15;
947
948     for (y = 0; y < h; y += 16) {
949         for (x = 0; x < w; x += 16) {
950             int offset = x + y * stride;
951             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
952                                      16);
953             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
954             int sae  = get_sae(src + offset, mean, stride);
955
956             acc += sae + 500 < sad;
957         }
958     }
959     return acc;
960 }
961
962
963 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
964 {
965     AVFrame *pic = NULL;
966     int64_t pts;
967     int i;
968     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
969                                                  (s->low_delay ? 0 : 1);
970     int direct = 1;
971
972     if (pic_arg) {
973         pts = pic_arg->pts;
974         pic_arg->display_picture_number = s->input_picture_number++;
975
976         if (pts != AV_NOPTS_VALUE) {
977             if (s->user_specified_pts != AV_NOPTS_VALUE) {
978                 int64_t time = pts;
979                 int64_t last = s->user_specified_pts;
980
981                 if (time <= last) {
982                     av_log(s->avctx, AV_LOG_ERROR,
983                            "Error, Invalid timestamp=%"PRId64", "
984                            "last=%"PRId64"\n", pts, s->user_specified_pts);
985                     return -1;
986                 }
987
988                 if (!s->low_delay && pic_arg->display_picture_number == 1)
989                     s->dts_delta = time - last;
990             }
991             s->user_specified_pts = pts;
992         } else {
993             if (s->user_specified_pts != AV_NOPTS_VALUE) {
994                 s->user_specified_pts =
995                 pts = s->user_specified_pts + 1;
996                 av_log(s->avctx, AV_LOG_INFO,
997                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
998                        pts);
999             } else {
1000                 pts = pic_arg->display_picture_number;
1001             }
1002         }
1003     }
1004
1005   if (pic_arg) {
1006     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
1007         direct = 0;
1008     if (pic_arg->linesize[0] != s->linesize)
1009         direct = 0;
1010     if (pic_arg->linesize[1] != s->uvlinesize)
1011         direct = 0;
1012     if (pic_arg->linesize[2] != s->uvlinesize)
1013         direct = 0;
1014
1015     av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
1016             pic_arg->linesize[1], s->linesize, s->uvlinesize);
1017
1018     if (direct) {
1019         i = ff_find_unused_picture(s, 1);
1020         if (i < 0)
1021             return i;
1022
1023         pic = &s->picture[i].f;
1024         pic->reference = 3;
1025
1026         for (i = 0; i < 4; i++) {
1027             pic->data[i]     = pic_arg->data[i];
1028             pic->linesize[i] = pic_arg->linesize[i];
1029         }
1030         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1031             return -1;
1032         }
1033     } else {
1034         i = ff_find_unused_picture(s, 0);
1035         if (i < 0)
1036             return i;
1037
1038         pic = &s->picture[i].f;
1039         pic->reference = 3;
1040
1041         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1042             return -1;
1043         }
1044
1045         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1046             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1047             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1048             // empty
1049         } else {
1050             int h_chroma_shift, v_chroma_shift;
1051             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1052                                           &v_chroma_shift);
1053
1054             for (i = 0; i < 3; i++) {
1055                 int src_stride = pic_arg->linesize[i];
1056                 int dst_stride = i ? s->uvlinesize : s->linesize;
1057                 int h_shift = i ? h_chroma_shift : 0;
1058                 int v_shift = i ? v_chroma_shift : 0;
1059                 int w = s->width  >> h_shift;
1060                 int h = s->height >> v_shift;
1061                 uint8_t *src = pic_arg->data[i];
1062                 uint8_t *dst = pic->data[i];
1063
1064                 if(s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1065                     h= ((s->height+15)/16*16)>>v_shift;
1066                 }
1067
1068                 if (!s->avctx->rc_buffer_size)
1069                     dst += INPLACE_OFFSET;
1070
1071                 if (src_stride == dst_stride)
1072                     memcpy(dst, src, src_stride * h);
1073                 else {
1074                     while (h--) {
1075                         memcpy(dst, src, w);
1076                         dst += dst_stride;
1077                         src += src_stride;
1078                     }
1079                 }
1080             }
1081         }
1082     }
1083     copy_picture_attributes(s, pic, pic_arg);
1084     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1085   }
1086
1087     /* shift buffer entries */
1088     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1089         s->input_picture[i - 1] = s->input_picture[i];
1090
1091     s->input_picture[encoding_delay] = (Picture*) pic;
1092
1093     return 0;
1094 }
1095
1096 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1097 {
1098     int x, y, plane;
1099     int score = 0;
1100     int64_t score64 = 0;
1101
1102     for (plane = 0; plane < 3; plane++) {
1103         const int stride = p->f.linesize[plane];
1104         const int bw = plane ? 1 : 2;
1105         for (y = 0; y < s->mb_height * bw; y++) {
1106             for (x = 0; x < s->mb_width * bw; x++) {
1107                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1108                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1109                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1110                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1111
1112                 switch (s->avctx->frame_skip_exp) {
1113                 case 0: score    =  FFMAX(score, v);          break;
1114                 case 1: score   += FFABS(v);                  break;
1115                 case 2: score   += v * v;                     break;
1116                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1117                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1118                 }
1119             }
1120         }
1121     }
1122
1123     if (score)
1124         score64 = score;
1125
1126     if (score64 < s->avctx->frame_skip_threshold)
1127         return 1;
1128     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1129         return 1;
1130     return 0;
1131 }
1132
1133 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1134 {
1135     AVPacket pkt = { 0 };
1136     int ret, got_output;
1137
1138     av_init_packet(&pkt);
1139     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1140     if (ret < 0)
1141         return ret;
1142
1143     ret = pkt.size;
1144     av_free_packet(&pkt);
1145     return ret;
1146 }
1147
1148 static int estimate_best_b_count(MpegEncContext *s)
1149 {
1150     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1151     AVCodecContext *c = avcodec_alloc_context3(NULL);
1152     AVFrame input[FF_MAX_B_FRAMES + 2];
1153     const int scale = s->avctx->brd_scale;
1154     int i, j, out_size, p_lambda, b_lambda, lambda2;
1155     int64_t best_rd  = INT64_MAX;
1156     int best_b_count = -1;
1157
1158     av_assert0(scale >= 0 && scale <= 3);
1159
1160     //emms_c();
1161     //s->next_picture_ptr->quality;
1162     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1163     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1164     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1165     if (!b_lambda) // FIXME we should do this somewhere else
1166         b_lambda = p_lambda;
1167     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1168                FF_LAMBDA_SHIFT;
1169
1170     c->width        = s->width  >> scale;
1171     c->height       = s->height >> scale;
1172     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1173                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1174     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1175     c->mb_decision  = s->avctx->mb_decision;
1176     c->me_cmp       = s->avctx->me_cmp;
1177     c->mb_cmp       = s->avctx->mb_cmp;
1178     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1179     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1180     c->time_base    = s->avctx->time_base;
1181     c->max_b_frames = s->max_b_frames;
1182
1183     if (avcodec_open2(c, codec, NULL) < 0)
1184         return -1;
1185
1186     for (i = 0; i < s->max_b_frames + 2; i++) {
1187         int ysize = c->width * c->height;
1188         int csize = (c->width / 2) * (c->height / 2);
1189         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1190                                                 s->next_picture_ptr;
1191
1192         avcodec_get_frame_defaults(&input[i]);
1193         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1194         input[i].data[1]     = input[i].data[0] + ysize;
1195         input[i].data[2]     = input[i].data[1] + csize;
1196         input[i].linesize[0] = c->width;
1197         input[i].linesize[1] =
1198         input[i].linesize[2] = c->width / 2;
1199
1200         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1201             pre_input = *pre_input_ptr;
1202
1203             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1204                 pre_input.f.data[0] += INPLACE_OFFSET;
1205                 pre_input.f.data[1] += INPLACE_OFFSET;
1206                 pre_input.f.data[2] += INPLACE_OFFSET;
1207             }
1208
1209             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1210                                  pre_input.f.data[0], pre_input.f.linesize[0],
1211                                  c->width,      c->height);
1212             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1213                                  pre_input.f.data[1], pre_input.f.linesize[1],
1214                                  c->width >> 1, c->height >> 1);
1215             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1216                                  pre_input.f.data[2], pre_input.f.linesize[2],
1217                                  c->width >> 1, c->height >> 1);
1218         }
1219     }
1220
1221     for (j = 0; j < s->max_b_frames + 1; j++) {
1222         int64_t rd = 0;
1223
1224         if (!s->input_picture[j])
1225             break;
1226
1227         c->error[0] = c->error[1] = c->error[2] = 0;
1228
1229         input[0].pict_type = AV_PICTURE_TYPE_I;
1230         input[0].quality   = 1 * FF_QP2LAMBDA;
1231
1232         out_size = encode_frame(c, &input[0]);
1233
1234         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1235
1236         for (i = 0; i < s->max_b_frames + 1; i++) {
1237             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1238
1239             input[i + 1].pict_type = is_p ?
1240                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1241             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1242
1243             out_size = encode_frame(c, &input[i + 1]);
1244
1245             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1246         }
1247
1248         /* get the delayed frames */
1249         while (out_size) {
1250             out_size = encode_frame(c, NULL);
1251             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1252         }
1253
1254         rd += c->error[0] + c->error[1] + c->error[2];
1255
1256         if (rd < best_rd) {
1257             best_rd = rd;
1258             best_b_count = j;
1259         }
1260     }
1261
1262     avcodec_close(c);
1263     av_freep(&c);
1264
1265     for (i = 0; i < s->max_b_frames + 2; i++) {
1266         av_freep(&input[i].data[0]);
1267     }
1268
1269     return best_b_count;
1270 }
1271
1272 static int select_input_picture(MpegEncContext *s)
1273 {
1274     int i;
1275
1276     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1277         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1278     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1279
1280     /* set next picture type & ordering */
1281     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1282         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1283             s->next_picture_ptr == NULL || s->intra_only) {
1284             s->reordered_input_picture[0] = s->input_picture[0];
1285             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1286             s->reordered_input_picture[0]->f.coded_picture_number =
1287                 s->coded_picture_number++;
1288         } else {
1289             int b_frames;
1290
1291             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1292                 if (s->picture_in_gop_number < s->gop_size &&
1293                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1294                     // FIXME check that te gop check above is +-1 correct
1295                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1296                         for (i = 0; i < 4; i++)
1297                             s->input_picture[0]->f.data[i] = NULL;
1298                         s->input_picture[0]->f.type = 0;
1299                     } else {
1300                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1301                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1302
1303                         s->avctx->release_buffer(s->avctx,
1304                                                  &s->input_picture[0]->f);
1305                     }
1306
1307                     emms_c();
1308                     ff_vbv_update(s, 0);
1309
1310                     goto no_output_pic;
1311                 }
1312             }
1313
1314             if (s->flags & CODEC_FLAG_PASS2) {
1315                 for (i = 0; i < s->max_b_frames + 1; i++) {
1316                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1317
1318                     if (pict_num >= s->rc_context.num_entries)
1319                         break;
1320                     if (!s->input_picture[i]) {
1321                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1322                         break;
1323                     }
1324
1325                     s->input_picture[i]->f.pict_type =
1326                         s->rc_context.entry[pict_num].new_pict_type;
1327                 }
1328             }
1329
1330             if (s->avctx->b_frame_strategy == 0) {
1331                 b_frames = s->max_b_frames;
1332                 while (b_frames && !s->input_picture[b_frames])
1333                     b_frames--;
1334             } else if (s->avctx->b_frame_strategy == 1) {
1335                 for (i = 1; i < s->max_b_frames + 1; i++) {
1336                     if (s->input_picture[i] &&
1337                         s->input_picture[i]->b_frame_score == 0) {
1338                         s->input_picture[i]->b_frame_score =
1339                             get_intra_count(s,
1340                                             s->input_picture[i    ]->f.data[0],
1341                                             s->input_picture[i - 1]->f.data[0],
1342                                             s->linesize) + 1;
1343                     }
1344                 }
1345                 for (i = 0; i < s->max_b_frames + 1; i++) {
1346                     if (s->input_picture[i] == NULL ||
1347                         s->input_picture[i]->b_frame_score - 1 >
1348                             s->mb_num / s->avctx->b_sensitivity)
1349                         break;
1350                 }
1351
1352                 b_frames = FFMAX(0, i - 1);
1353
1354                 /* reset scores */
1355                 for (i = 0; i < b_frames + 1; i++) {
1356                     s->input_picture[i]->b_frame_score = 0;
1357                 }
1358             } else if (s->avctx->b_frame_strategy == 2) {
1359                 b_frames = estimate_best_b_count(s);
1360             } else {
1361                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1362                 b_frames = 0;
1363             }
1364
1365             emms_c();
1366
1367             for (i = b_frames - 1; i >= 0; i--) {
1368                 int type = s->input_picture[i]->f.pict_type;
1369                 if (type && type != AV_PICTURE_TYPE_B)
1370                     b_frames = i;
1371             }
1372             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1373                 b_frames == s->max_b_frames) {
1374                 av_log(s->avctx, AV_LOG_ERROR,
1375                        "warning, too many b frames in a row\n");
1376             }
1377
1378             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1379                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1380                     s->gop_size > s->picture_in_gop_number) {
1381                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1382                 } else {
1383                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1384                         b_frames = 0;
1385                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1386                 }
1387             }
1388
1389             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1390                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1391                 b_frames--;
1392
1393             s->reordered_input_picture[0] = s->input_picture[b_frames];
1394             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1395                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1396             s->reordered_input_picture[0]->f.coded_picture_number =
1397                 s->coded_picture_number++;
1398             for (i = 0; i < b_frames; i++) {
1399                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1400                 s->reordered_input_picture[i + 1]->f.pict_type =
1401                     AV_PICTURE_TYPE_B;
1402                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1403                     s->coded_picture_number++;
1404             }
1405         }
1406     }
1407 no_output_pic:
1408     if (s->reordered_input_picture[0]) {
1409         s->reordered_input_picture[0]->f.reference =
1410            s->reordered_input_picture[0]->f.pict_type !=
1411                AV_PICTURE_TYPE_B ? 3 : 0;
1412
1413         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1414
1415         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1416             s->avctx->rc_buffer_size) {
1417             // input is a shared pix, so we can't modifiy it -> alloc a new
1418             // one & ensure that the shared one is reuseable
1419
1420             Picture *pic;
1421             int i = ff_find_unused_picture(s, 0);
1422             if (i < 0)
1423                 return i;
1424             pic = &s->picture[i];
1425
1426             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1427             if (ff_alloc_picture(s, pic, 0) < 0) {
1428                 return -1;
1429             }
1430
1431             /* mark us unused / free shared pic */
1432             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1433                 s->avctx->release_buffer(s->avctx,
1434                                          &s->reordered_input_picture[0]->f);
1435             for (i = 0; i < 4; i++)
1436                 s->reordered_input_picture[0]->f.data[i] = NULL;
1437             s->reordered_input_picture[0]->f.type = 0;
1438
1439             copy_picture_attributes(s, &pic->f,
1440                                     &s->reordered_input_picture[0]->f);
1441
1442             s->current_picture_ptr = pic;
1443         } else {
1444             // input is not a shared pix -> reuse buffer for current_pix
1445
1446             assert(s->reordered_input_picture[0]->f.type ==
1447                        FF_BUFFER_TYPE_USER ||
1448                    s->reordered_input_picture[0]->f.type ==
1449                        FF_BUFFER_TYPE_INTERNAL);
1450
1451             s->current_picture_ptr = s->reordered_input_picture[0];
1452             for (i = 0; i < 4; i++) {
1453                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1454             }
1455         }
1456         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1457
1458         s->picture_number = s->new_picture.f.display_picture_number;
1459     } else {
1460         memset(&s->new_picture, 0, sizeof(Picture));
1461     }
1462     return 0;
1463 }
1464
1465 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1466                           AVFrame *pic_arg, int *got_packet)
1467 {
1468     MpegEncContext *s = avctx->priv_data;
1469     int i, stuffing_count, ret;
1470     int context_count = s->slice_context_count;
1471
1472     s->picture_in_gop_number++;
1473
1474     if (load_input_picture(s, pic_arg) < 0)
1475         return -1;
1476
1477     if (select_input_picture(s) < 0) {
1478         return -1;
1479     }
1480
1481     /* output? */
1482     if (s->new_picture.f.data[0]) {
1483         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1484             return ret;
1485         if (s->mb_info) {
1486             s->mb_info_ptr = av_packet_new_side_data(pkt,
1487                                  AV_PKT_DATA_H263_MB_INFO,
1488                                  s->mb_width*s->mb_height*12);
1489             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1490         }
1491
1492         for (i = 0; i < context_count; i++) {
1493             int start_y = s->thread_context[i]->start_mb_y;
1494             int   end_y = s->thread_context[i]->  end_mb_y;
1495             int h       = s->mb_height;
1496             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1497             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1498
1499             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1500         }
1501
1502         s->pict_type = s->new_picture.f.pict_type;
1503         //emms_c();
1504         if (ff_MPV_frame_start(s, avctx) < 0)
1505             return -1;
1506 vbv_retry:
1507         if (encode_picture(s, s->picture_number) < 0)
1508             return -1;
1509
1510         avctx->header_bits = s->header_bits;
1511         avctx->mv_bits     = s->mv_bits;
1512         avctx->misc_bits   = s->misc_bits;
1513         avctx->i_tex_bits  = s->i_tex_bits;
1514         avctx->p_tex_bits  = s->p_tex_bits;
1515         avctx->i_count     = s->i_count;
1516         // FIXME f/b_count in avctx
1517         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1518         avctx->skip_count  = s->skip_count;
1519
1520         ff_MPV_frame_end(s);
1521
1522         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1523             ff_mjpeg_encode_picture_trailer(s);
1524
1525         if (avctx->rc_buffer_size) {
1526             RateControlContext *rcc = &s->rc_context;
1527             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1528
1529             if (put_bits_count(&s->pb) > max_size &&
1530                 s->lambda < s->avctx->lmax) {
1531                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1532                                        (s->qscale + 1) / s->qscale);
1533                 if (s->adaptive_quant) {
1534                     int i;
1535                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1536                         s->lambda_table[i] =
1537                             FFMAX(s->lambda_table[i] + 1,
1538                                   s->lambda_table[i] * (s->qscale + 1) /
1539                                   s->qscale);
1540                 }
1541                 s->mb_skipped = 0;        // done in MPV_frame_start()
1542                 // done in encode_picture() so we must undo it
1543                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1544                     if (s->flipflop_rounding          ||
1545                         s->codec_id == AV_CODEC_ID_H263P ||
1546                         s->codec_id == AV_CODEC_ID_MPEG4)
1547                         s->no_rounding ^= 1;
1548                 }
1549                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1550                     s->time_base       = s->last_time_base;
1551                     s->last_non_b_time = s->time - s->pp_time;
1552                 }
1553                 for (i = 0; i < context_count; i++) {
1554                     PutBitContext *pb = &s->thread_context[i]->pb;
1555                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1556                 }
1557                 goto vbv_retry;
1558             }
1559
1560             assert(s->avctx->rc_max_rate);
1561         }
1562
1563         if (s->flags & CODEC_FLAG_PASS1)
1564             ff_write_pass1_stats(s);
1565
1566         for (i = 0; i < 4; i++) {
1567             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1568             avctx->error[i] += s->current_picture_ptr->f.error[i];
1569         }
1570
1571         if (s->flags & CODEC_FLAG_PASS1)
1572             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1573                    avctx->i_tex_bits + avctx->p_tex_bits ==
1574                        put_bits_count(&s->pb));
1575         flush_put_bits(&s->pb);
1576         s->frame_bits  = put_bits_count(&s->pb);
1577
1578         stuffing_count = ff_vbv_update(s, s->frame_bits);
1579         s->stuffing_bits = 8*stuffing_count;
1580         if (stuffing_count) {
1581             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1582                     stuffing_count + 50) {
1583                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1584                 return -1;
1585             }
1586
1587             switch (s->codec_id) {
1588             case AV_CODEC_ID_MPEG1VIDEO:
1589             case AV_CODEC_ID_MPEG2VIDEO:
1590                 while (stuffing_count--) {
1591                     put_bits(&s->pb, 8, 0);
1592                 }
1593             break;
1594             case AV_CODEC_ID_MPEG4:
1595                 put_bits(&s->pb, 16, 0);
1596                 put_bits(&s->pb, 16, 0x1C3);
1597                 stuffing_count -= 4;
1598                 while (stuffing_count--) {
1599                     put_bits(&s->pb, 8, 0xFF);
1600                 }
1601             break;
1602             default:
1603                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1604             }
1605             flush_put_bits(&s->pb);
1606             s->frame_bits  = put_bits_count(&s->pb);
1607         }
1608
1609         /* update mpeg1/2 vbv_delay for CBR */
1610         if (s->avctx->rc_max_rate                          &&
1611             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1612             s->out_format == FMT_MPEG1                     &&
1613             90000LL * (avctx->rc_buffer_size - 1) <=
1614                 s->avctx->rc_max_rate * 0xFFFFLL) {
1615             int vbv_delay, min_delay;
1616             double inbits  = s->avctx->rc_max_rate *
1617                              av_q2d(s->avctx->time_base);
1618             int    minbits = s->frame_bits - 8 *
1619                              (s->vbv_delay_ptr - s->pb.buf - 1);
1620             double bits    = s->rc_context.buffer_index + minbits - inbits;
1621
1622             if (bits < 0)
1623                 av_log(s->avctx, AV_LOG_ERROR,
1624                        "Internal error, negative bits\n");
1625
1626             assert(s->repeat_first_field == 0);
1627
1628             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1629             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1630                         s->avctx->rc_max_rate;
1631
1632             vbv_delay = FFMAX(vbv_delay, min_delay);
1633
1634             av_assert0(vbv_delay < 0xFFFF);
1635
1636             s->vbv_delay_ptr[0] &= 0xF8;
1637             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1638             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1639             s->vbv_delay_ptr[2] &= 0x07;
1640             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1641             avctx->vbv_delay     = vbv_delay * 300;
1642         }
1643         s->total_bits     += s->frame_bits;
1644         avctx->frame_bits  = s->frame_bits;
1645
1646         pkt->pts = s->current_picture.f.pts;
1647         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1648             if (!s->current_picture.f.coded_picture_number)
1649                 pkt->dts = pkt->pts - s->dts_delta;
1650             else
1651                 pkt->dts = s->reordered_pts;
1652             s->reordered_pts = pkt->pts;
1653         } else
1654             pkt->dts = pkt->pts;
1655         if (s->current_picture.f.key_frame)
1656             pkt->flags |= AV_PKT_FLAG_KEY;
1657         if (s->mb_info)
1658             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1659     } else {
1660         s->frame_bits = 0;
1661     }
1662     assert((s->frame_bits & 7) == 0);
1663
1664     pkt->size = s->frame_bits / 8;
1665     *got_packet = !!pkt->size;
1666     return 0;
1667 }
1668
1669 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1670                                                 int n, int threshold)
1671 {
1672     static const char tab[64] = {
1673         3, 2, 2, 1, 1, 1, 1, 1,
1674         1, 1, 1, 1, 1, 1, 1, 1,
1675         1, 1, 1, 1, 1, 1, 1, 1,
1676         0, 0, 0, 0, 0, 0, 0, 0,
1677         0, 0, 0, 0, 0, 0, 0, 0,
1678         0, 0, 0, 0, 0, 0, 0, 0,
1679         0, 0, 0, 0, 0, 0, 0, 0,
1680         0, 0, 0, 0, 0, 0, 0, 0
1681     };
1682     int score = 0;
1683     int run = 0;
1684     int i;
1685     DCTELEM *block = s->block[n];
1686     const int last_index = s->block_last_index[n];
1687     int skip_dc;
1688
1689     if (threshold < 0) {
1690         skip_dc = 0;
1691         threshold = -threshold;
1692     } else
1693         skip_dc = 1;
1694
1695     /* Are all we could set to zero already zero? */
1696     if (last_index <= skip_dc - 1)
1697         return;
1698
1699     for (i = 0; i <= last_index; i++) {
1700         const int j = s->intra_scantable.permutated[i];
1701         const int level = FFABS(block[j]);
1702         if (level == 1) {
1703             if (skip_dc && i == 0)
1704                 continue;
1705             score += tab[run];
1706             run = 0;
1707         } else if (level > 1) {
1708             return;
1709         } else {
1710             run++;
1711         }
1712     }
1713     if (score >= threshold)
1714         return;
1715     for (i = skip_dc; i <= last_index; i++) {
1716         const int j = s->intra_scantable.permutated[i];
1717         block[j] = 0;
1718     }
1719     if (block[0])
1720         s->block_last_index[n] = 0;
1721     else
1722         s->block_last_index[n] = -1;
1723 }
1724
1725 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1726                                int last_index)
1727 {
1728     int i;
1729     const int maxlevel = s->max_qcoeff;
1730     const int minlevel = s->min_qcoeff;
1731     int overflow = 0;
1732
1733     if (s->mb_intra) {
1734         i = 1; // skip clipping of intra dc
1735     } else
1736         i = 0;
1737
1738     for (; i <= last_index; i++) {
1739         const int j = s->intra_scantable.permutated[i];
1740         int level = block[j];
1741
1742         if (level > maxlevel) {
1743             level = maxlevel;
1744             overflow++;
1745         } else if (level < minlevel) {
1746             level = minlevel;
1747             overflow++;
1748         }
1749
1750         block[j] = level;
1751     }
1752
1753     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1754         av_log(s->avctx, AV_LOG_INFO,
1755                "warning, clipping %d dct coefficients to %d..%d\n",
1756                overflow, minlevel, maxlevel);
1757 }
1758
1759 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1760 {
1761     int x, y;
1762     // FIXME optimize
1763     for (y = 0; y < 8; y++) {
1764         for (x = 0; x < 8; x++) {
1765             int x2, y2;
1766             int sum = 0;
1767             int sqr = 0;
1768             int count = 0;
1769
1770             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1771                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1772                     int v = ptr[x2 + y2 * stride];
1773                     sum += v;
1774                     sqr += v * v;
1775                     count++;
1776                 }
1777             }
1778             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1779         }
1780     }
1781 }
1782
1783 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1784                                                 int motion_x, int motion_y,
1785                                                 int mb_block_height,
1786                                                 int mb_block_count)
1787 {
1788     int16_t weight[8][64];
1789     DCTELEM orig[8][64];
1790     const int mb_x = s->mb_x;
1791     const int mb_y = s->mb_y;
1792     int i;
1793     int skip_dct[8];
1794     int dct_offset = s->linesize * 8; // default for progressive frames
1795     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1796     int wrap_y, wrap_c;
1797
1798     for (i = 0; i < mb_block_count; i++)
1799         skip_dct[i] = s->skipdct;
1800
1801     if (s->adaptive_quant) {
1802         const int last_qp = s->qscale;
1803         const int mb_xy = mb_x + mb_y * s->mb_stride;
1804
1805         s->lambda = s->lambda_table[mb_xy];
1806         update_qscale(s);
1807
1808         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1809             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1810             s->dquant = s->qscale - last_qp;
1811
1812             if (s->out_format == FMT_H263) {
1813                 s->dquant = av_clip(s->dquant, -2, 2);
1814
1815                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1816                     if (!s->mb_intra) {
1817                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1818                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1819                                 s->dquant = 0;
1820                         }
1821                         if (s->mv_type == MV_TYPE_8X8)
1822                             s->dquant = 0;
1823                     }
1824                 }
1825             }
1826         }
1827         ff_set_qscale(s, last_qp + s->dquant);
1828     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1829         ff_set_qscale(s, s->qscale + s->dquant);
1830
1831     wrap_y = s->linesize;
1832     wrap_c = s->uvlinesize;
1833     ptr_y  = s->new_picture.f.data[0] +
1834              (mb_y * 16 * wrap_y)              + mb_x * 16;
1835     ptr_cb = s->new_picture.f.data[1] +
1836              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1837     ptr_cr = s->new_picture.f.data[2] +
1838              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1839
1840     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1841         uint8_t *ebuf = s->edge_emu_buffer + 32;
1842         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1843                                 mb_y * 16, s->width, s->height);
1844         ptr_y = ebuf;
1845         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1846                                 mb_block_height, mb_x * 8, mb_y * 8,
1847                                 (s->width+1) >> 1, (s->height+1) >> 1);
1848         ptr_cb = ebuf + 18 * wrap_y;
1849         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1850                                 mb_block_height, mb_x * 8, mb_y * 8,
1851                                 (s->width+1) >> 1, (s->height+1) >> 1);
1852         ptr_cr = ebuf + 18 * wrap_y + 8;
1853     }
1854
1855     if (s->mb_intra) {
1856         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1857             int progressive_score, interlaced_score;
1858
1859             s->interlaced_dct = 0;
1860             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1861                                                     NULL, wrap_y, 8) +
1862                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1863                                                     NULL, wrap_y, 8) - 400;
1864
1865             if (progressive_score > 0) {
1866                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1867                                                        NULL, wrap_y * 2, 8) +
1868                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1869                                                        NULL, wrap_y * 2, 8);
1870                 if (progressive_score > interlaced_score) {
1871                     s->interlaced_dct = 1;
1872
1873                     dct_offset = wrap_y;
1874                     wrap_y <<= 1;
1875                     if (s->chroma_format == CHROMA_422)
1876                         wrap_c <<= 1;
1877                 }
1878             }
1879         }
1880
1881         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1882         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1883         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1884         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1885
1886         if (s->flags & CODEC_FLAG_GRAY) {
1887             skip_dct[4] = 1;
1888             skip_dct[5] = 1;
1889         } else {
1890             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1891             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1892             if (!s->chroma_y_shift) { /* 422 */
1893                 s->dsp.get_pixels(s->block[6],
1894                                   ptr_cb + (dct_offset >> 1), wrap_c);
1895                 s->dsp.get_pixels(s->block[7],
1896                                   ptr_cr + (dct_offset >> 1), wrap_c);
1897             }
1898         }
1899     } else {
1900         op_pixels_func (*op_pix)[4];
1901         qpel_mc_func (*op_qpix)[16];
1902         uint8_t *dest_y, *dest_cb, *dest_cr;
1903
1904         dest_y  = s->dest[0];
1905         dest_cb = s->dest[1];
1906         dest_cr = s->dest[2];
1907
1908         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1909             op_pix  = s->dsp.put_pixels_tab;
1910             op_qpix = s->dsp.put_qpel_pixels_tab;
1911         } else {
1912             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1913             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1914         }
1915
1916         if (s->mv_dir & MV_DIR_FORWARD) {
1917             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1918                           s->last_picture.f.data,
1919                           op_pix, op_qpix);
1920             op_pix  = s->dsp.avg_pixels_tab;
1921             op_qpix = s->dsp.avg_qpel_pixels_tab;
1922         }
1923         if (s->mv_dir & MV_DIR_BACKWARD) {
1924             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1925                           s->next_picture.f.data,
1926                           op_pix, op_qpix);
1927         }
1928
1929         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1930             int progressive_score, interlaced_score;
1931
1932             s->interlaced_dct = 0;
1933             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1934                                                     ptr_y,              wrap_y,
1935                                                     8) +
1936                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1937                                                     ptr_y + wrap_y * 8, wrap_y,
1938                                                     8) - 400;
1939
1940             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1941                 progressive_score -= 400;
1942
1943             if (progressive_score > 0) {
1944                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1945                                                        ptr_y,
1946                                                        wrap_y * 2, 8) +
1947                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1948                                                        ptr_y + wrap_y,
1949                                                        wrap_y * 2, 8);
1950
1951                 if (progressive_score > interlaced_score) {
1952                     s->interlaced_dct = 1;
1953
1954                     dct_offset = wrap_y;
1955                     wrap_y <<= 1;
1956                     if (s->chroma_format == CHROMA_422)
1957                         wrap_c <<= 1;
1958                 }
1959             }
1960         }
1961
1962         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1963         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1964         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1965                            dest_y + dct_offset, wrap_y);
1966         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1967                            dest_y + dct_offset + 8, wrap_y);
1968
1969         if (s->flags & CODEC_FLAG_GRAY) {
1970             skip_dct[4] = 1;
1971             skip_dct[5] = 1;
1972         } else {
1973             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1974             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1975             if (!s->chroma_y_shift) { /* 422 */
1976                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1977                                    dest_cb + (dct_offset >> 1), wrap_c);
1978                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1979                                    dest_cr + (dct_offset >> 1), wrap_c);
1980             }
1981         }
1982         /* pre quantization */
1983         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1984                 2 * s->qscale * s->qscale) {
1985             // FIXME optimize
1986             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1987                               wrap_y, 8) < 20 * s->qscale)
1988                 skip_dct[0] = 1;
1989             if (s->dsp.sad[1](NULL, ptr_y + 8,
1990                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1991                 skip_dct[1] = 1;
1992             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1993                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1994                 skip_dct[2] = 1;
1995             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1996                               dest_y + dct_offset + 8,
1997                               wrap_y, 8) < 20 * s->qscale)
1998                 skip_dct[3] = 1;
1999             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2000                               wrap_c, 8) < 20 * s->qscale)
2001                 skip_dct[4] = 1;
2002             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2003                               wrap_c, 8) < 20 * s->qscale)
2004                 skip_dct[5] = 1;
2005             if (!s->chroma_y_shift) { /* 422 */
2006                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2007                                   dest_cb + (dct_offset >> 1),
2008                                   wrap_c, 8) < 20 * s->qscale)
2009                     skip_dct[6] = 1;
2010                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2011                                   dest_cr + (dct_offset >> 1),
2012                                   wrap_c, 8) < 20 * s->qscale)
2013                     skip_dct[7] = 1;
2014             }
2015         }
2016     }
2017
2018     if (s->quantizer_noise_shaping) {
2019         if (!skip_dct[0])
2020             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2021         if (!skip_dct[1])
2022             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2023         if (!skip_dct[2])
2024             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2025         if (!skip_dct[3])
2026             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2027         if (!skip_dct[4])
2028             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2029         if (!skip_dct[5])
2030             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2031         if (!s->chroma_y_shift) { /* 422 */
2032             if (!skip_dct[6])
2033                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2034                                   wrap_c);
2035             if (!skip_dct[7])
2036                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2037                                   wrap_c);
2038         }
2039         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2040     }
2041
2042     /* DCT & quantize */
2043     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2044     {
2045         for (i = 0; i < mb_block_count; i++) {
2046             if (!skip_dct[i]) {
2047                 int overflow;
2048                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2049                 // FIXME we could decide to change to quantizer instead of
2050                 // clipping
2051                 // JS: I don't think that would be a good idea it could lower
2052                 //     quality instead of improve it. Just INTRADC clipping
2053                 //     deserves changes in quantizer
2054                 if (overflow)
2055                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2056             } else
2057                 s->block_last_index[i] = -1;
2058         }
2059         if (s->quantizer_noise_shaping) {
2060             for (i = 0; i < mb_block_count; i++) {
2061                 if (!skip_dct[i]) {
2062                     s->block_last_index[i] =
2063                         dct_quantize_refine(s, s->block[i], weight[i],
2064                                             orig[i], i, s->qscale);
2065                 }
2066             }
2067         }
2068
2069         if (s->luma_elim_threshold && !s->mb_intra)
2070             for (i = 0; i < 4; i++)
2071                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2072         if (s->chroma_elim_threshold && !s->mb_intra)
2073             for (i = 4; i < mb_block_count; i++)
2074                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2075
2076         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2077             for (i = 0; i < mb_block_count; i++) {
2078                 if (s->block_last_index[i] == -1)
2079                     s->coded_score[i] = INT_MAX / 256;
2080             }
2081         }
2082     }
2083
2084     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2085         s->block_last_index[4] =
2086         s->block_last_index[5] = 0;
2087         s->block[4][0] =
2088         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2089     }
2090
2091     // non c quantize code returns incorrect block_last_index FIXME
2092     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2093         for (i = 0; i < mb_block_count; i++) {
2094             int j;
2095             if (s->block_last_index[i] > 0) {
2096                 for (j = 63; j > 0; j--) {
2097                     if (s->block[i][s->intra_scantable.permutated[j]])
2098                         break;
2099                 }
2100                 s->block_last_index[i] = j;
2101             }
2102         }
2103     }
2104
2105     /* huffman encode */
2106     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2107     case AV_CODEC_ID_MPEG1VIDEO:
2108     case AV_CODEC_ID_MPEG2VIDEO:
2109         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2110             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2111         break;
2112     case AV_CODEC_ID_MPEG4:
2113         if (CONFIG_MPEG4_ENCODER)
2114             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2115         break;
2116     case AV_CODEC_ID_MSMPEG4V2:
2117     case AV_CODEC_ID_MSMPEG4V3:
2118     case AV_CODEC_ID_WMV1:
2119         if (CONFIG_MSMPEG4_ENCODER)
2120             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2121         break;
2122     case AV_CODEC_ID_WMV2:
2123         if (CONFIG_WMV2_ENCODER)
2124             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2125         break;
2126     case AV_CODEC_ID_H261:
2127         if (CONFIG_H261_ENCODER)
2128             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2129         break;
2130     case AV_CODEC_ID_H263:
2131     case AV_CODEC_ID_H263P:
2132     case AV_CODEC_ID_FLV1:
2133     case AV_CODEC_ID_RV10:
2134     case AV_CODEC_ID_RV20:
2135         if (CONFIG_H263_ENCODER)
2136             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2137         break;
2138     case AV_CODEC_ID_MJPEG:
2139     case AV_CODEC_ID_AMV:
2140         if (CONFIG_MJPEG_ENCODER)
2141             ff_mjpeg_encode_mb(s, s->block);
2142         break;
2143     default:
2144         av_assert1(0);
2145     }
2146 }
2147
2148 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2149 {
2150     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2151     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2152 }
2153
2154 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2155     int i;
2156
2157     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2158
2159     /* mpeg1 */
2160     d->mb_skip_run= s->mb_skip_run;
2161     for(i=0; i<3; i++)
2162         d->last_dc[i] = s->last_dc[i];
2163
2164     /* statistics */
2165     d->mv_bits= s->mv_bits;
2166     d->i_tex_bits= s->i_tex_bits;
2167     d->p_tex_bits= s->p_tex_bits;
2168     d->i_count= s->i_count;
2169     d->f_count= s->f_count;
2170     d->b_count= s->b_count;
2171     d->skip_count= s->skip_count;
2172     d->misc_bits= s->misc_bits;
2173     d->last_bits= 0;
2174
2175     d->mb_skipped= 0;
2176     d->qscale= s->qscale;
2177     d->dquant= s->dquant;
2178
2179     d->esc3_level_length= s->esc3_level_length;
2180 }
2181
2182 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2183     int i;
2184
2185     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2186     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2187
2188     /* mpeg1 */
2189     d->mb_skip_run= s->mb_skip_run;
2190     for(i=0; i<3; i++)
2191         d->last_dc[i] = s->last_dc[i];
2192
2193     /* statistics */
2194     d->mv_bits= s->mv_bits;
2195     d->i_tex_bits= s->i_tex_bits;
2196     d->p_tex_bits= s->p_tex_bits;
2197     d->i_count= s->i_count;
2198     d->f_count= s->f_count;
2199     d->b_count= s->b_count;
2200     d->skip_count= s->skip_count;
2201     d->misc_bits= s->misc_bits;
2202
2203     d->mb_intra= s->mb_intra;
2204     d->mb_skipped= s->mb_skipped;
2205     d->mv_type= s->mv_type;
2206     d->mv_dir= s->mv_dir;
2207     d->pb= s->pb;
2208     if(s->data_partitioning){
2209         d->pb2= s->pb2;
2210         d->tex_pb= s->tex_pb;
2211     }
2212     d->block= s->block;
2213     for(i=0; i<8; i++)
2214         d->block_last_index[i]= s->block_last_index[i];
2215     d->interlaced_dct= s->interlaced_dct;
2216     d->qscale= s->qscale;
2217
2218     d->esc3_level_length= s->esc3_level_length;
2219 }
2220
2221 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2222                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2223                            int *dmin, int *next_block, int motion_x, int motion_y)
2224 {
2225     int score;
2226     uint8_t *dest_backup[3];
2227
2228     copy_context_before_encode(s, backup, type);
2229
2230     s->block= s->blocks[*next_block];
2231     s->pb= pb[*next_block];
2232     if(s->data_partitioning){
2233         s->pb2   = pb2   [*next_block];
2234         s->tex_pb= tex_pb[*next_block];
2235     }
2236
2237     if(*next_block){
2238         memcpy(dest_backup, s->dest, sizeof(s->dest));
2239         s->dest[0] = s->rd_scratchpad;
2240         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2241         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2242         assert(s->linesize >= 32); //FIXME
2243     }
2244
2245     encode_mb(s, motion_x, motion_y);
2246
2247     score= put_bits_count(&s->pb);
2248     if(s->data_partitioning){
2249         score+= put_bits_count(&s->pb2);
2250         score+= put_bits_count(&s->tex_pb);
2251     }
2252
2253     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2254         ff_MPV_decode_mb(s, s->block);
2255
2256         score *= s->lambda2;
2257         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2258     }
2259
2260     if(*next_block){
2261         memcpy(s->dest, dest_backup, sizeof(s->dest));
2262     }
2263
2264     if(score<*dmin){
2265         *dmin= score;
2266         *next_block^=1;
2267
2268         copy_context_after_encode(best, s, type);
2269     }
2270 }
2271
2272 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2273     uint32_t *sq = ff_squareTbl + 256;
2274     int acc=0;
2275     int x,y;
2276
2277     if(w==16 && h==16)
2278         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2279     else if(w==8 && h==8)
2280         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2281
2282     for(y=0; y<h; y++){
2283         for(x=0; x<w; x++){
2284             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2285         }
2286     }
2287
2288     av_assert2(acc>=0);
2289
2290     return acc;
2291 }
2292
2293 static int sse_mb(MpegEncContext *s){
2294     int w= 16;
2295     int h= 16;
2296
2297     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2298     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2299
2300     if(w==16 && h==16)
2301       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2302         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2303                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2304                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2305       }else{
2306         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2307                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2308                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2309       }
2310     else
2311         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2312                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2313                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2314 }
2315
2316 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2317     MpegEncContext *s= *(void**)arg;
2318
2319
2320     s->me.pre_pass=1;
2321     s->me.dia_size= s->avctx->pre_dia_size;
2322     s->first_slice_line=1;
2323     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2324         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2325             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2326         }
2327         s->first_slice_line=0;
2328     }
2329
2330     s->me.pre_pass=0;
2331
2332     return 0;
2333 }
2334
2335 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2336     MpegEncContext *s= *(void**)arg;
2337
2338     ff_check_alignment();
2339
2340     s->me.dia_size= s->avctx->dia_size;
2341     s->first_slice_line=1;
2342     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2343         s->mb_x=0; //for block init below
2344         ff_init_block_index(s);
2345         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2346             s->block_index[0]+=2;
2347             s->block_index[1]+=2;
2348             s->block_index[2]+=2;
2349             s->block_index[3]+=2;
2350
2351             /* compute motion vector & mb_type and store in context */
2352             if(s->pict_type==AV_PICTURE_TYPE_B)
2353                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2354             else
2355                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2356         }
2357         s->first_slice_line=0;
2358     }
2359     return 0;
2360 }
2361
2362 static int mb_var_thread(AVCodecContext *c, void *arg){
2363     MpegEncContext *s= *(void**)arg;
2364     int mb_x, mb_y;
2365
2366     ff_check_alignment();
2367
2368     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2369         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2370             int xx = mb_x * 16;
2371             int yy = mb_y * 16;
2372             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2373             int varc;
2374             int sum = s->dsp.pix_sum(pix, s->linesize);
2375
2376             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2377
2378             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2379             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2380             s->me.mb_var_sum_temp    += varc;
2381         }
2382     }
2383     return 0;
2384 }
2385
2386 static void write_slice_end(MpegEncContext *s){
2387     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2388         if(s->partitioned_frame){
2389             ff_mpeg4_merge_partitions(s);
2390         }
2391
2392         ff_mpeg4_stuffing(&s->pb);
2393     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2394         ff_mjpeg_encode_stuffing(s);
2395     }
2396
2397     avpriv_align_put_bits(&s->pb);
2398     flush_put_bits(&s->pb);
2399
2400     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2401         s->misc_bits+= get_bits_diff(s);
2402 }
2403
2404 static void write_mb_info(MpegEncContext *s)
2405 {
2406     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2407     int offset = put_bits_count(&s->pb);
2408     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2409     int gobn = s->mb_y / s->gob_index;
2410     int pred_x, pred_y;
2411     if (CONFIG_H263_ENCODER)
2412         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2413     bytestream_put_le32(&ptr, offset);
2414     bytestream_put_byte(&ptr, s->qscale);
2415     bytestream_put_byte(&ptr, gobn);
2416     bytestream_put_le16(&ptr, mba);
2417     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2418     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2419     /* 4MV not implemented */
2420     bytestream_put_byte(&ptr, 0); /* hmv2 */
2421     bytestream_put_byte(&ptr, 0); /* vmv2 */
2422 }
2423
2424 static void update_mb_info(MpegEncContext *s, int startcode)
2425 {
2426     if (!s->mb_info)
2427         return;
2428     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2429         s->mb_info_size += 12;
2430         s->prev_mb_info = s->last_mb_info;
2431     }
2432     if (startcode) {
2433         s->prev_mb_info = put_bits_count(&s->pb)/8;
2434         /* This might have incremented mb_info_size above, and we return without
2435          * actually writing any info into that slot yet. But in that case,
2436          * this will be called again at the start of the after writing the
2437          * start code, actually writing the mb info. */
2438         return;
2439     }
2440
2441     s->last_mb_info = put_bits_count(&s->pb)/8;
2442     if (!s->mb_info_size)
2443         s->mb_info_size += 12;
2444     write_mb_info(s);
2445 }
2446
2447 static int encode_thread(AVCodecContext *c, void *arg){
2448     MpegEncContext *s= *(void**)arg;
2449     int mb_x, mb_y, pdif = 0;
2450     int chr_h= 16>>s->chroma_y_shift;
2451     int i, j;
2452     MpegEncContext best_s, backup_s;
2453     uint8_t bit_buf[2][MAX_MB_BYTES];
2454     uint8_t bit_buf2[2][MAX_MB_BYTES];
2455     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2456     PutBitContext pb[2], pb2[2], tex_pb[2];
2457
2458     ff_check_alignment();
2459
2460     for(i=0; i<2; i++){
2461         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2462         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2463         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2464     }
2465
2466     s->last_bits= put_bits_count(&s->pb);
2467     s->mv_bits=0;
2468     s->misc_bits=0;
2469     s->i_tex_bits=0;
2470     s->p_tex_bits=0;
2471     s->i_count=0;
2472     s->f_count=0;
2473     s->b_count=0;
2474     s->skip_count=0;
2475
2476     for(i=0; i<3; i++){
2477         /* init last dc values */
2478         /* note: quant matrix value (8) is implied here */
2479         s->last_dc[i] = 128 << s->intra_dc_precision;
2480
2481         s->current_picture.f.error[i] = 0;
2482     }
2483     if(s->codec_id==AV_CODEC_ID_AMV){
2484         s->last_dc[0] = 128*8/13;
2485         s->last_dc[1] = 128*8/14;
2486         s->last_dc[2] = 128*8/14;
2487     }
2488     s->mb_skip_run = 0;
2489     memset(s->last_mv, 0, sizeof(s->last_mv));
2490
2491     s->last_mv_dir = 0;
2492
2493     switch(s->codec_id){
2494     case AV_CODEC_ID_H263:
2495     case AV_CODEC_ID_H263P:
2496     case AV_CODEC_ID_FLV1:
2497         if (CONFIG_H263_ENCODER)
2498             s->gob_index = ff_h263_get_gob_height(s);
2499         break;
2500     case AV_CODEC_ID_MPEG4:
2501         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2502             ff_mpeg4_init_partitions(s);
2503         break;
2504     }
2505
2506     s->resync_mb_x=0;
2507     s->resync_mb_y=0;
2508     s->first_slice_line = 1;
2509     s->ptr_lastgob = s->pb.buf;
2510     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2511         s->mb_x=0;
2512         s->mb_y= mb_y;
2513
2514         ff_set_qscale(s, s->qscale);
2515         ff_init_block_index(s);
2516
2517         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2518             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2519             int mb_type= s->mb_type[xy];
2520 //            int d;
2521             int dmin= INT_MAX;
2522             int dir;
2523
2524             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2525                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2526                 return -1;
2527             }
2528             if(s->data_partitioning){
2529                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2530                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2531                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2532                     return -1;
2533                 }
2534             }
2535
2536             s->mb_x = mb_x;
2537             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2538             ff_update_block_index(s);
2539
2540             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2541                 ff_h261_reorder_mb_index(s);
2542                 xy= s->mb_y*s->mb_stride + s->mb_x;
2543                 mb_type= s->mb_type[xy];
2544             }
2545
2546             /* write gob / video packet header  */
2547             if(s->rtp_mode){
2548                 int current_packet_size, is_gob_start;
2549
2550                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2551
2552                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2553
2554                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2555
2556                 switch(s->codec_id){
2557                 case AV_CODEC_ID_H263:
2558                 case AV_CODEC_ID_H263P:
2559                     if(!s->h263_slice_structured)
2560                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2561                     break;
2562                 case AV_CODEC_ID_MPEG2VIDEO:
2563                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2564                 case AV_CODEC_ID_MPEG1VIDEO:
2565                     if(s->mb_skip_run) is_gob_start=0;
2566                     break;
2567                 case AV_CODEC_ID_MJPEG:
2568                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2569                     break;
2570                 }
2571
2572                 if(is_gob_start){
2573                     if(s->start_mb_y != mb_y || mb_x!=0){
2574                         write_slice_end(s);
2575                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2576                             ff_mpeg4_init_partitions(s);
2577                         }
2578                     }
2579
2580                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2581                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2582
2583                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2584                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2585                         int d= 100 / s->avctx->error_rate;
2586                         if(r % d == 0){
2587                             current_packet_size=0;
2588                             s->pb.buf_ptr= s->ptr_lastgob;
2589                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2590                         }
2591                     }
2592
2593                     if (s->avctx->rtp_callback){
2594                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2595                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2596                     }
2597                     update_mb_info(s, 1);
2598
2599                     switch(s->codec_id){
2600                     case AV_CODEC_ID_MPEG4:
2601                         if (CONFIG_MPEG4_ENCODER) {
2602                             ff_mpeg4_encode_video_packet_header(s);
2603                             ff_mpeg4_clean_buffers(s);
2604                         }
2605                     break;
2606                     case AV_CODEC_ID_MPEG1VIDEO:
2607                     case AV_CODEC_ID_MPEG2VIDEO:
2608                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2609                             ff_mpeg1_encode_slice_header(s);
2610                             ff_mpeg1_clean_buffers(s);
2611                         }
2612                     break;
2613                     case AV_CODEC_ID_H263:
2614                     case AV_CODEC_ID_H263P:
2615                         if (CONFIG_H263_ENCODER)
2616                             ff_h263_encode_gob_header(s, mb_y);
2617                     break;
2618                     }
2619
2620                     if(s->flags&CODEC_FLAG_PASS1){
2621                         int bits= put_bits_count(&s->pb);
2622                         s->misc_bits+= bits - s->last_bits;
2623                         s->last_bits= bits;
2624                     }
2625
2626                     s->ptr_lastgob += current_packet_size;
2627                     s->first_slice_line=1;
2628                     s->resync_mb_x=mb_x;
2629                     s->resync_mb_y=mb_y;
2630                 }
2631             }
2632
2633             if(  (s->resync_mb_x   == s->mb_x)
2634                && s->resync_mb_y+1 == s->mb_y){
2635                 s->first_slice_line=0;
2636             }
2637
2638             s->mb_skipped=0;
2639             s->dquant=0; //only for QP_RD
2640
2641             update_mb_info(s, 0);
2642
2643             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2644                 int next_block=0;
2645                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2646
2647                 copy_context_before_encode(&backup_s, s, -1);
2648                 backup_s.pb= s->pb;
2649                 best_s.data_partitioning= s->data_partitioning;
2650                 best_s.partitioned_frame= s->partitioned_frame;
2651                 if(s->data_partitioning){
2652                     backup_s.pb2= s->pb2;
2653                     backup_s.tex_pb= s->tex_pb;
2654                 }
2655
2656                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2657                     s->mv_dir = MV_DIR_FORWARD;
2658                     s->mv_type = MV_TYPE_16X16;
2659                     s->mb_intra= 0;
2660                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2661                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2662                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2663                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2664                 }
2665                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2666                     s->mv_dir = MV_DIR_FORWARD;
2667                     s->mv_type = MV_TYPE_FIELD;
2668                     s->mb_intra= 0;
2669                     for(i=0; i<2; i++){
2670                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2671                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2672                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2673                     }
2674                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2675                                  &dmin, &next_block, 0, 0);
2676                 }
2677                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2678                     s->mv_dir = MV_DIR_FORWARD;
2679                     s->mv_type = MV_TYPE_16X16;
2680                     s->mb_intra= 0;
2681                     s->mv[0][0][0] = 0;
2682                     s->mv[0][0][1] = 0;
2683                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2684                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2685                 }
2686                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2687                     s->mv_dir = MV_DIR_FORWARD;
2688                     s->mv_type = MV_TYPE_8X8;
2689                     s->mb_intra= 0;
2690                     for(i=0; i<4; i++){
2691                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2692                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2693                     }
2694                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2695                                  &dmin, &next_block, 0, 0);
2696                 }
2697                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2698                     s->mv_dir = MV_DIR_FORWARD;
2699                     s->mv_type = MV_TYPE_16X16;
2700                     s->mb_intra= 0;
2701                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2702                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2703                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2704                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2705                 }
2706                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2707                     s->mv_dir = MV_DIR_BACKWARD;
2708                     s->mv_type = MV_TYPE_16X16;
2709                     s->mb_intra= 0;
2710                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2711                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2712                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2713                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2714                 }
2715                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2716                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2717                     s->mv_type = MV_TYPE_16X16;
2718                     s->mb_intra= 0;
2719                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2720                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2721                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2722                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2723                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2724                                  &dmin, &next_block, 0, 0);
2725                 }
2726                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2727                     s->mv_dir = MV_DIR_FORWARD;
2728                     s->mv_type = MV_TYPE_FIELD;
2729                     s->mb_intra= 0;
2730                     for(i=0; i<2; i++){
2731                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2732                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2733                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2734                     }
2735                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2736                                  &dmin, &next_block, 0, 0);
2737                 }
2738                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2739                     s->mv_dir = MV_DIR_BACKWARD;
2740                     s->mv_type = MV_TYPE_FIELD;
2741                     s->mb_intra= 0;
2742                     for(i=0; i<2; i++){
2743                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2744                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2745                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2746                     }
2747                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2748                                  &dmin, &next_block, 0, 0);
2749                 }
2750                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2751                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2752                     s->mv_type = MV_TYPE_FIELD;
2753                     s->mb_intra= 0;
2754                     for(dir=0; dir<2; dir++){
2755                         for(i=0; i<2; i++){
2756                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2757                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2758                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2759                         }
2760                     }
2761                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2762                                  &dmin, &next_block, 0, 0);
2763                 }
2764                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2765                     s->mv_dir = 0;
2766                     s->mv_type = MV_TYPE_16X16;
2767                     s->mb_intra= 1;
2768                     s->mv[0][0][0] = 0;
2769                     s->mv[0][0][1] = 0;
2770                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2771                                  &dmin, &next_block, 0, 0);
2772                     if(s->h263_pred || s->h263_aic){
2773                         if(best_s.mb_intra)
2774                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2775                         else
2776                             ff_clean_intra_table_entries(s); //old mode?
2777                     }
2778                 }
2779
2780                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2781                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2782                         const int last_qp= backup_s.qscale;
2783                         int qpi, qp, dc[6];
2784                         DCTELEM ac[6][16];
2785                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2786                         static const int dquant_tab[4]={-1,1,-2,2};
2787
2788                         av_assert2(backup_s.dquant == 0);
2789
2790                         //FIXME intra
2791                         s->mv_dir= best_s.mv_dir;
2792                         s->mv_type = MV_TYPE_16X16;
2793                         s->mb_intra= best_s.mb_intra;
2794                         s->mv[0][0][0] = best_s.mv[0][0][0];
2795                         s->mv[0][0][1] = best_s.mv[0][0][1];
2796                         s->mv[1][0][0] = best_s.mv[1][0][0];
2797                         s->mv[1][0][1] = best_s.mv[1][0][1];
2798
2799                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2800                         for(; qpi<4; qpi++){
2801                             int dquant= dquant_tab[qpi];
2802                             qp= last_qp + dquant;
2803                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2804                                 continue;
2805                             backup_s.dquant= dquant;
2806                             if(s->mb_intra && s->dc_val[0]){
2807                                 for(i=0; i<6; i++){
2808                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2809                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2810                                 }
2811                             }
2812
2813                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2814                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2815                             if(best_s.qscale != qp){
2816                                 if(s->mb_intra && s->dc_val[0]){
2817                                     for(i=0; i<6; i++){
2818                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2819                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2820                                     }
2821                                 }
2822                             }
2823                         }
2824                     }
2825                 }
2826                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2827                     int mx= s->b_direct_mv_table[xy][0];
2828                     int my= s->b_direct_mv_table[xy][1];
2829
2830                     backup_s.dquant = 0;
2831                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2832                     s->mb_intra= 0;
2833                     ff_mpeg4_set_direct_mv(s, mx, my);
2834                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2835                                  &dmin, &next_block, mx, my);
2836                 }
2837                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2838                     backup_s.dquant = 0;
2839                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2840                     s->mb_intra= 0;
2841                     ff_mpeg4_set_direct_mv(s, 0, 0);
2842                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2843                                  &dmin, &next_block, 0, 0);
2844                 }
2845                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2846                     int coded=0;
2847                     for(i=0; i<6; i++)
2848                         coded |= s->block_last_index[i];
2849                     if(coded){
2850                         int mx,my;
2851                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2852                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2853                             mx=my=0; //FIXME find the one we actually used
2854                             ff_mpeg4_set_direct_mv(s, mx, my);
2855                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2856                             mx= s->mv[1][0][0];
2857                             my= s->mv[1][0][1];
2858                         }else{
2859                             mx= s->mv[0][0][0];
2860                             my= s->mv[0][0][1];
2861                         }
2862
2863                         s->mv_dir= best_s.mv_dir;
2864                         s->mv_type = best_s.mv_type;
2865                         s->mb_intra= 0;
2866 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2867                         s->mv[0][0][1] = best_s.mv[0][0][1];
2868                         s->mv[1][0][0] = best_s.mv[1][0][0];
2869                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2870                         backup_s.dquant= 0;
2871                         s->skipdct=1;
2872                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2873                                         &dmin, &next_block, mx, my);
2874                         s->skipdct=0;
2875                     }
2876                 }
2877
2878                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2879
2880                 copy_context_after_encode(s, &best_s, -1);
2881
2882                 pb_bits_count= put_bits_count(&s->pb);
2883                 flush_put_bits(&s->pb);
2884                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2885                 s->pb= backup_s.pb;
2886
2887                 if(s->data_partitioning){
2888                     pb2_bits_count= put_bits_count(&s->pb2);
2889                     flush_put_bits(&s->pb2);
2890                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2891                     s->pb2= backup_s.pb2;
2892
2893                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2894                     flush_put_bits(&s->tex_pb);
2895                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2896                     s->tex_pb= backup_s.tex_pb;
2897                 }
2898                 s->last_bits= put_bits_count(&s->pb);
2899
2900                 if (CONFIG_H263_ENCODER &&
2901                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2902                     ff_h263_update_motion_val(s);
2903
2904                 if(next_block==0){ //FIXME 16 vs linesize16
2905                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2906                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2907                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2908                 }
2909
2910                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2911                     ff_MPV_decode_mb(s, s->block);
2912             } else {
2913                 int motion_x = 0, motion_y = 0;
2914                 s->mv_type=MV_TYPE_16X16;
2915                 // only one MB-Type possible
2916
2917                 switch(mb_type){
2918                 case CANDIDATE_MB_TYPE_INTRA:
2919                     s->mv_dir = 0;
2920                     s->mb_intra= 1;
2921                     motion_x= s->mv[0][0][0] = 0;
2922                     motion_y= s->mv[0][0][1] = 0;
2923                     break;
2924                 case CANDIDATE_MB_TYPE_INTER:
2925                     s->mv_dir = MV_DIR_FORWARD;
2926                     s->mb_intra= 0;
2927                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2928                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2929                     break;
2930                 case CANDIDATE_MB_TYPE_INTER_I:
2931                     s->mv_dir = MV_DIR_FORWARD;
2932                     s->mv_type = MV_TYPE_FIELD;
2933                     s->mb_intra= 0;
2934                     for(i=0; i<2; i++){
2935                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2936                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2937                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2938                     }
2939                     break;
2940                 case CANDIDATE_MB_TYPE_INTER4V:
2941                     s->mv_dir = MV_DIR_FORWARD;
2942                     s->mv_type = MV_TYPE_8X8;
2943                     s->mb_intra= 0;
2944                     for(i=0; i<4; i++){
2945                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2946                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2947                     }
2948                     break;
2949                 case CANDIDATE_MB_TYPE_DIRECT:
2950                     if (CONFIG_MPEG4_ENCODER) {
2951                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2952                         s->mb_intra= 0;
2953                         motion_x=s->b_direct_mv_table[xy][0];
2954                         motion_y=s->b_direct_mv_table[xy][1];
2955                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2956                     }
2957                     break;
2958                 case CANDIDATE_MB_TYPE_DIRECT0:
2959                     if (CONFIG_MPEG4_ENCODER) {
2960                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2961                         s->mb_intra= 0;
2962                         ff_mpeg4_set_direct_mv(s, 0, 0);
2963                     }
2964                     break;
2965                 case CANDIDATE_MB_TYPE_BIDIR:
2966                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2967                     s->mb_intra= 0;
2968                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2969                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2970                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2971                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2972                     break;
2973                 case CANDIDATE_MB_TYPE_BACKWARD:
2974                     s->mv_dir = MV_DIR_BACKWARD;
2975                     s->mb_intra= 0;
2976                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2977                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2978                     break;
2979                 case CANDIDATE_MB_TYPE_FORWARD:
2980                     s->mv_dir = MV_DIR_FORWARD;
2981                     s->mb_intra= 0;
2982                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2983                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2984                     break;
2985                 case CANDIDATE_MB_TYPE_FORWARD_I:
2986                     s->mv_dir = MV_DIR_FORWARD;
2987                     s->mv_type = MV_TYPE_FIELD;
2988                     s->mb_intra= 0;
2989                     for(i=0; i<2; i++){
2990                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2991                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2992                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2993                     }
2994                     break;
2995                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2996                     s->mv_dir = MV_DIR_BACKWARD;
2997                     s->mv_type = MV_TYPE_FIELD;
2998                     s->mb_intra= 0;
2999                     for(i=0; i<2; i++){
3000                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3001                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3002                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3003                     }
3004                     break;
3005                 case CANDIDATE_MB_TYPE_BIDIR_I:
3006                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3007                     s->mv_type = MV_TYPE_FIELD;
3008                     s->mb_intra= 0;
3009                     for(dir=0; dir<2; dir++){
3010                         for(i=0; i<2; i++){
3011                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3012                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3013                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3014                         }
3015                     }
3016                     break;
3017                 default:
3018                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3019                 }
3020
3021                 encode_mb(s, motion_x, motion_y);
3022
3023                 // RAL: Update last macroblock type
3024                 s->last_mv_dir = s->mv_dir;
3025
3026                 if (CONFIG_H263_ENCODER &&
3027                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3028                     ff_h263_update_motion_val(s);
3029
3030                 ff_MPV_decode_mb(s, s->block);
3031             }
3032
3033             /* clean the MV table in IPS frames for direct mode in B frames */
3034             if(s->mb_intra /* && I,P,S_TYPE */){
3035                 s->p_mv_table[xy][0]=0;
3036                 s->p_mv_table[xy][1]=0;
3037             }
3038
3039             if(s->flags&CODEC_FLAG_PSNR){
3040                 int w= 16;
3041                 int h= 16;
3042
3043                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3044                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3045
3046                 s->current_picture.f.error[0] += sse(
3047                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3048                     s->dest[0], w, h, s->linesize);
3049                 s->current_picture.f.error[1] += sse(
3050                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3051                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3052                 s->current_picture.f.error[2] += sse(
3053                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3054                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3055             }
3056             if(s->loop_filter){
3057                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3058                     ff_h263_loop_filter(s);
3059             }
3060             av_dlog(s->avctx, "MB %d %d bits\n",
3061                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3062         }
3063     }
3064
3065     //not beautiful here but we must write it before flushing so it has to be here
3066     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3067         ff_msmpeg4_encode_ext_header(s);
3068
3069     write_slice_end(s);
3070
3071     /* Send the last GOB if RTP */
3072     if (s->avctx->rtp_callback) {
3073         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3074         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3075         /* Call the RTP callback to send the last GOB */
3076         emms_c();
3077         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3078     }
3079
3080     return 0;
3081 }
3082
3083 #define MERGE(field) dst->field += src->field; src->field=0
3084 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3085     MERGE(me.scene_change_score);
3086     MERGE(me.mc_mb_var_sum_temp);
3087     MERGE(me.mb_var_sum_temp);
3088 }
3089
3090 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3091     int i;
3092
3093     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3094     MERGE(dct_count[1]);
3095     MERGE(mv_bits);
3096     MERGE(i_tex_bits);
3097     MERGE(p_tex_bits);
3098     MERGE(i_count);
3099     MERGE(f_count);
3100     MERGE(b_count);
3101     MERGE(skip_count);
3102     MERGE(misc_bits);
3103     MERGE(error_count);
3104     MERGE(padding_bug_score);
3105     MERGE(current_picture.f.error[0]);
3106     MERGE(current_picture.f.error[1]);
3107     MERGE(current_picture.f.error[2]);
3108
3109     if(dst->avctx->noise_reduction){
3110         for(i=0; i<64; i++){
3111             MERGE(dct_error_sum[0][i]);
3112             MERGE(dct_error_sum[1][i]);
3113         }
3114     }
3115
3116     assert(put_bits_count(&src->pb) % 8 ==0);
3117     assert(put_bits_count(&dst->pb) % 8 ==0);
3118     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3119     flush_put_bits(&dst->pb);
3120 }
3121
3122 static int estimate_qp(MpegEncContext *s, int dry_run){
3123     if (s->next_lambda){
3124         s->current_picture_ptr->f.quality =
3125         s->current_picture.f.quality = s->next_lambda;
3126         if(!dry_run) s->next_lambda= 0;
3127     } else if (!s->fixed_qscale) {
3128         s->current_picture_ptr->f.quality =
3129         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3130         if (s->current_picture.f.quality < 0)
3131             return -1;
3132     }
3133
3134     if(s->adaptive_quant){
3135         switch(s->codec_id){
3136         case AV_CODEC_ID_MPEG4:
3137             if (CONFIG_MPEG4_ENCODER)
3138                 ff_clean_mpeg4_qscales(s);
3139             break;
3140         case AV_CODEC_ID_H263:
3141         case AV_CODEC_ID_H263P:
3142         case AV_CODEC_ID_FLV1:
3143             if (CONFIG_H263_ENCODER)
3144                 ff_clean_h263_qscales(s);
3145             break;
3146         default:
3147             ff_init_qscale_tab(s);
3148         }
3149
3150         s->lambda= s->lambda_table[0];
3151         //FIXME broken
3152     }else
3153         s->lambda = s->current_picture.f.quality;
3154     update_qscale(s);
3155     return 0;
3156 }
3157
3158 /* must be called before writing the header */
3159 static void set_frame_distances(MpegEncContext * s){
3160     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3161     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3162
3163     if(s->pict_type==AV_PICTURE_TYPE_B){
3164         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3165         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3166     }else{
3167         s->pp_time= s->time - s->last_non_b_time;
3168         s->last_non_b_time= s->time;
3169         assert(s->picture_number==0 || s->pp_time > 0);
3170     }
3171 }
3172
3173 static int encode_picture(MpegEncContext *s, int picture_number)
3174 {
3175     int i;
3176     int bits;
3177     int context_count = s->slice_context_count;
3178
3179     s->picture_number = picture_number;
3180
3181     /* Reset the average MB variance */
3182     s->me.mb_var_sum_temp    =
3183     s->me.mc_mb_var_sum_temp = 0;
3184
3185     /* we need to initialize some time vars before we can encode b-frames */
3186     // RAL: Condition added for MPEG1VIDEO
3187     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3188         set_frame_distances(s);
3189     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3190         ff_set_mpeg4_time(s);
3191
3192     s->me.scene_change_score=0;
3193
3194 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3195
3196     if(s->pict_type==AV_PICTURE_TYPE_I){
3197         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3198         else                        s->no_rounding=0;
3199     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3200         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3201             s->no_rounding ^= 1;
3202     }
3203
3204     if(s->flags & CODEC_FLAG_PASS2){
3205         if (estimate_qp(s,1) < 0)
3206             return -1;
3207         ff_get_2pass_fcode(s);
3208     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3209         if(s->pict_type==AV_PICTURE_TYPE_B)
3210             s->lambda= s->last_lambda_for[s->pict_type];
3211         else
3212             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3213         update_qscale(s);
3214     }
3215
3216     if(s->codec_id != AV_CODEC_ID_AMV){
3217         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3218         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3219         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3220         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3221     }
3222
3223     s->mb_intra=0; //for the rate distortion & bit compare functions
3224     for(i=1; i<context_count; i++){
3225         ff_update_duplicate_context(s->thread_context[i], s);
3226     }
3227
3228     if(ff_init_me(s)<0)
3229         return -1;
3230
3231     /* Estimate motion for every MB */
3232     if(s->pict_type != AV_PICTURE_TYPE_I){
3233         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3234         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3235         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3236             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3237                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3238             }
3239         }
3240
3241         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3242     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3243         /* I-Frame */
3244         for(i=0; i<s->mb_stride*s->mb_height; i++)
3245             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3246
3247         if(!s->fixed_qscale){
3248             /* finding spatial complexity for I-frame rate control */
3249             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3250         }
3251     }
3252     for(i=1; i<context_count; i++){
3253         merge_context_after_me(s, s->thread_context[i]);
3254     }
3255     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3256     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3257     emms_c();
3258
3259     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3260         s->pict_type= AV_PICTURE_TYPE_I;
3261         for(i=0; i<s->mb_stride*s->mb_height; i++)
3262             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3263         if(s->msmpeg4_version >= 3)
3264             s->no_rounding=1;
3265         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3266                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3267     }
3268
3269     if(!s->umvplus){
3270         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3271             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3272
3273             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3274                 int a,b;
3275                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3276                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3277                 s->f_code= FFMAX3(s->f_code, a, b);
3278             }
3279
3280             ff_fix_long_p_mvs(s);
3281             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3282             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3283                 int j;
3284                 for(i=0; i<2; i++){
3285                     for(j=0; j<2; j++)
3286                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3287                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3288                 }
3289             }
3290         }
3291
3292         if(s->pict_type==AV_PICTURE_TYPE_B){
3293             int a, b;
3294
3295             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3296             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3297             s->f_code = FFMAX(a, b);
3298
3299             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3300             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3301             s->b_code = FFMAX(a, b);
3302
3303             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3304             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3305             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3306             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3307             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3308                 int dir, j;
3309                 for(dir=0; dir<2; dir++){
3310                     for(i=0; i<2; i++){
3311                         for(j=0; j<2; j++){
3312                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3313                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3314                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3315                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3316                         }
3317                     }
3318                 }
3319             }
3320         }
3321     }
3322
3323     if (estimate_qp(s, 0) < 0)
3324         return -1;
3325
3326     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3327         s->qscale= 3; //reduce clipping problems
3328
3329     if (s->out_format == FMT_MJPEG) {
3330         /* for mjpeg, we do include qscale in the matrix */
3331         for(i=1;i<64;i++){
3332             int j= s->dsp.idct_permutation[i];
3333
3334             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3335         }
3336         s->y_dc_scale_table=
3337         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3338         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3339         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3340                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3341         s->qscale= 8;
3342     }
3343     if(s->codec_id == AV_CODEC_ID_AMV){
3344         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3345         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3346         for(i=1;i<64;i++){
3347             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3348
3349             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3350             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3351         }
3352         s->y_dc_scale_table= y;
3353         s->c_dc_scale_table= c;
3354         s->intra_matrix[0] = 13;
3355         s->chroma_intra_matrix[0] = 14;
3356         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3357                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3358         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3359                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3360         s->qscale= 8;
3361     }
3362
3363     //FIXME var duplication
3364     s->current_picture_ptr->f.key_frame =
3365     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3366     s->current_picture_ptr->f.pict_type =
3367     s->current_picture.f.pict_type = s->pict_type;
3368
3369     if (s->current_picture.f.key_frame)
3370         s->picture_in_gop_number=0;
3371
3372     s->mb_x = s->mb_y = 0;
3373     s->last_bits= put_bits_count(&s->pb);
3374     switch(s->out_format) {
3375     case FMT_MJPEG:
3376         if (CONFIG_MJPEG_ENCODER)
3377             ff_mjpeg_encode_picture_header(s);
3378         break;
3379     case FMT_H261:
3380         if (CONFIG_H261_ENCODER)
3381             ff_h261_encode_picture_header(s, picture_number);
3382         break;
3383     case FMT_H263:
3384         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3385             ff_wmv2_encode_picture_header(s, picture_number);
3386         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3387             ff_msmpeg4_encode_picture_header(s, picture_number);
3388         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3389             ff_mpeg4_encode_picture_header(s, picture_number);
3390         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3391             ff_rv10_encode_picture_header(s, picture_number);
3392         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3393             ff_rv20_encode_picture_header(s, picture_number);
3394         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3395             ff_flv_encode_picture_header(s, picture_number);
3396         else if (CONFIG_H263_ENCODER)
3397             ff_h263_encode_picture_header(s, picture_number);
3398         break;
3399     case FMT_MPEG1:
3400         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3401             ff_mpeg1_encode_picture_header(s, picture_number);
3402         break;
3403     case FMT_H264:
3404         break;
3405     default:
3406         av_assert0(0);
3407     }
3408     bits= put_bits_count(&s->pb);
3409     s->header_bits= bits - s->last_bits;
3410
3411     for(i=1; i<context_count; i++){
3412         update_duplicate_context_after_me(s->thread_context[i], s);
3413     }
3414     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3415     for(i=1; i<context_count; i++){
3416         merge_context_after_encode(s, s->thread_context[i]);
3417     }
3418     emms_c();
3419     return 0;
3420 }
3421
3422 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3423     const int intra= s->mb_intra;
3424     int i;
3425
3426     s->dct_count[intra]++;
3427
3428     for(i=0; i<64; i++){
3429         int level= block[i];
3430
3431         if(level){
3432             if(level>0){
3433                 s->dct_error_sum[intra][i] += level;
3434                 level -= s->dct_offset[intra][i];
3435                 if(level<0) level=0;
3436             }else{
3437                 s->dct_error_sum[intra][i] -= level;
3438                 level += s->dct_offset[intra][i];
3439                 if(level>0) level=0;
3440             }
3441             block[i]= level;
3442         }
3443     }
3444 }
3445
3446 static int dct_quantize_trellis_c(MpegEncContext *s,
3447                                   DCTELEM *block, int n,
3448                                   int qscale, int *overflow){
3449     const int *qmat;
3450     const uint8_t *scantable= s->intra_scantable.scantable;
3451     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3452     int max=0;
3453     unsigned int threshold1, threshold2;
3454     int bias=0;
3455     int run_tab[65];
3456     int level_tab[65];
3457     int score_tab[65];
3458     int survivor[65];
3459     int survivor_count;
3460     int last_run=0;
3461     int last_level=0;
3462     int last_score= 0;
3463     int last_i;
3464     int coeff[2][64];
3465     int coeff_count[64];
3466     int qmul, qadd, start_i, last_non_zero, i, dc;
3467     const int esc_length= s->ac_esc_length;
3468     uint8_t * length;
3469     uint8_t * last_length;
3470     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3471
3472     s->dsp.fdct (block);
3473
3474     if(s->dct_error_sum)
3475         s->denoise_dct(s, block);
3476     qmul= qscale*16;
3477     qadd= ((qscale-1)|1)*8;
3478
3479     if (s->mb_intra) {
3480         int q;
3481         if (!s->h263_aic) {
3482             if (n < 4)
3483                 q = s->y_dc_scale;
3484             else
3485                 q = s->c_dc_scale;
3486             q = q << 3;
3487         } else{
3488             /* For AIC we skip quant/dequant of INTRADC */
3489             q = 1 << 3;
3490             qadd=0;
3491         }
3492
3493         /* note: block[0] is assumed to be positive */
3494         block[0] = (block[0] + (q >> 1)) / q;
3495         start_i = 1;
3496         last_non_zero = 0;
3497         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3498         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3499             bias= 1<<(QMAT_SHIFT-1);
3500         length     = s->intra_ac_vlc_length;
3501         last_length= s->intra_ac_vlc_last_length;
3502     } else {
3503         start_i = 0;
3504         last_non_zero = -1;
3505         qmat = s->q_inter_matrix[qscale];
3506         length     = s->inter_ac_vlc_length;
3507         last_length= s->inter_ac_vlc_last_length;
3508     }
3509     last_i= start_i;
3510
3511     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3512     threshold2= (threshold1<<1);
3513
3514     for(i=63; i>=start_i; i--) {
3515         const int j = scantable[i];
3516         int level = block[j] * qmat[j];
3517
3518         if(((unsigned)(level+threshold1))>threshold2){
3519             last_non_zero = i;
3520             break;
3521         }
3522     }
3523
3524     for(i=start_i; i<=last_non_zero; i++) {
3525         const int j = scantable[i];
3526         int level = block[j] * qmat[j];
3527
3528 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3529 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3530         if(((unsigned)(level+threshold1))>threshold2){
3531             if(level>0){
3532                 level= (bias + level)>>QMAT_SHIFT;
3533                 coeff[0][i]= level;
3534                 coeff[1][i]= level-1;
3535 //                coeff[2][k]= level-2;
3536             }else{
3537                 level= (bias - level)>>QMAT_SHIFT;
3538                 coeff[0][i]= -level;
3539                 coeff[1][i]= -level+1;
3540 //                coeff[2][k]= -level+2;
3541             }
3542             coeff_count[i]= FFMIN(level, 2);
3543             av_assert2(coeff_count[i]);
3544             max |=level;
3545         }else{
3546             coeff[0][i]= (level>>31)|1;
3547             coeff_count[i]= 1;
3548         }
3549     }
3550
3551     *overflow= s->max_qcoeff < max; //overflow might have happened
3552
3553     if(last_non_zero < start_i){
3554         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3555         return last_non_zero;
3556     }
3557
3558     score_tab[start_i]= 0;
3559     survivor[0]= start_i;
3560     survivor_count= 1;
3561
3562     for(i=start_i; i<=last_non_zero; i++){
3563         int level_index, j, zero_distortion;
3564         int dct_coeff= FFABS(block[ scantable[i] ]);
3565         int best_score=256*256*256*120;
3566
3567         if (s->dsp.fdct == ff_fdct_ifast)
3568             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3569         zero_distortion= dct_coeff*dct_coeff;
3570
3571         for(level_index=0; level_index < coeff_count[i]; level_index++){
3572             int distortion;
3573             int level= coeff[level_index][i];
3574             const int alevel= FFABS(level);
3575             int unquant_coeff;
3576
3577             av_assert2(level);
3578
3579             if(s->out_format == FMT_H263){
3580                 unquant_coeff= alevel*qmul + qadd;
3581             }else{ //MPEG1
3582                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3583                 if(s->mb_intra){
3584                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3585                         unquant_coeff =   (unquant_coeff - 1) | 1;
3586                 }else{
3587                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3588                         unquant_coeff =   (unquant_coeff - 1) | 1;
3589                 }
3590                 unquant_coeff<<= 3;
3591             }
3592
3593             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3594             level+=64;
3595             if((level&(~127)) == 0){
3596                 for(j=survivor_count-1; j>=0; j--){
3597                     int run= i - survivor[j];
3598                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3599                     score += score_tab[i-run];
3600
3601                     if(score < best_score){
3602                         best_score= score;
3603                         run_tab[i+1]= run;
3604                         level_tab[i+1]= level-64;
3605                     }
3606                 }
3607
3608                 if(s->out_format == FMT_H263){
3609                     for(j=survivor_count-1; j>=0; j--){
3610                         int run= i - survivor[j];
3611                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3612                         score += score_tab[i-run];
3613                         if(score < last_score){
3614                             last_score= score;
3615                             last_run= run;
3616                             last_level= level-64;
3617                             last_i= i+1;
3618                         }
3619                     }
3620                 }
3621             }else{
3622                 distortion += esc_length*lambda;
3623                 for(j=survivor_count-1; j>=0; j--){
3624                     int run= i - survivor[j];
3625                     int score= distortion + score_tab[i-run];
3626
3627                     if(score < best_score){
3628                         best_score= score;
3629                         run_tab[i+1]= run;
3630                         level_tab[i+1]= level-64;
3631                     }
3632                 }
3633
3634                 if(s->out_format == FMT_H263){
3635                   for(j=survivor_count-1; j>=0; j--){
3636                         int run= i - survivor[j];
3637                         int score= distortion + score_tab[i-run];
3638                         if(score < last_score){
3639                             last_score= score;
3640                             last_run= run;
3641                             last_level= level-64;
3642                             last_i= i+1;
3643                         }
3644                     }
3645                 }
3646             }
3647         }
3648
3649         score_tab[i+1]= best_score;
3650
3651         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3652         if(last_non_zero <= 27){
3653             for(; survivor_count; survivor_count--){
3654                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3655                     break;
3656             }
3657         }else{
3658             for(; survivor_count; survivor_count--){
3659                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3660                     break;
3661             }
3662         }
3663
3664         survivor[ survivor_count++ ]= i+1;
3665     }
3666
3667     if(s->out_format != FMT_H263){
3668         last_score= 256*256*256*120;
3669         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3670             int score= score_tab[i];
3671             if(i) score += lambda*2; //FIXME exacter?
3672
3673             if(score < last_score){
3674                 last_score= score;
3675                 last_i= i;
3676                 last_level= level_tab[i];
3677                 last_run= run_tab[i];
3678             }
3679         }
3680     }
3681
3682     s->coded_score[n] = last_score;
3683
3684     dc= FFABS(block[0]);
3685     last_non_zero= last_i - 1;
3686     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3687
3688     if(last_non_zero < start_i)
3689         return last_non_zero;
3690
3691     if(last_non_zero == 0 && start_i == 0){
3692         int best_level= 0;
3693         int best_score= dc * dc;
3694
3695         for(i=0; i<coeff_count[0]; i++){
3696             int level= coeff[i][0];
3697             int alevel= FFABS(level);
3698             int unquant_coeff, score, distortion;
3699
3700             if(s->out_format == FMT_H263){
3701                     unquant_coeff= (alevel*qmul + qadd)>>3;
3702             }else{ //MPEG1
3703                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3704                     unquant_coeff =   (unquant_coeff - 1) | 1;
3705             }
3706             unquant_coeff = (unquant_coeff + 4) >> 3;
3707             unquant_coeff<<= 3 + 3;
3708
3709             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3710             level+=64;
3711             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3712             else                    score= distortion + esc_length*lambda;
3713
3714             if(score < best_score){
3715                 best_score= score;
3716                 best_level= level - 64;
3717             }
3718         }
3719         block[0]= best_level;
3720         s->coded_score[n] = best_score - dc*dc;
3721         if(best_level == 0) return -1;
3722         else                return last_non_zero;
3723     }
3724
3725     i= last_i;
3726     av_assert2(last_level);
3727
3728     block[ perm_scantable[last_non_zero] ]= last_level;
3729     i -= last_run + 1;
3730
3731     for(; i>start_i; i -= run_tab[i] + 1){
3732         block[ perm_scantable[i-1] ]= level_tab[i];
3733     }
3734
3735     return last_non_zero;
3736 }
3737
3738 //#define REFINE_STATS 1
3739 static int16_t basis[64][64];
3740
3741 static void build_basis(uint8_t *perm){
3742     int i, j, x, y;
3743     emms_c();
3744     for(i=0; i<8; i++){
3745         for(j=0; j<8; j++){
3746             for(y=0; y<8; y++){
3747                 for(x=0; x<8; x++){
3748                     double s= 0.25*(1<<BASIS_SHIFT);
3749                     int index= 8*i + j;
3750                     int perm_index= perm[index];
3751                     if(i==0) s*= sqrt(0.5);
3752                     if(j==0) s*= sqrt(0.5);
3753                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3754                 }
3755             }
3756         }
3757     }
3758 }
3759
3760 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3761                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3762                         int n, int qscale){
3763     int16_t rem[64];
3764     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3765     const uint8_t *scantable= s->intra_scantable.scantable;
3766     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3767 //    unsigned int threshold1, threshold2;
3768 //    int bias=0;
3769     int run_tab[65];
3770     int prev_run=0;
3771     int prev_level=0;
3772     int qmul, qadd, start_i, last_non_zero, i, dc;
3773     uint8_t * length;
3774     uint8_t * last_length;
3775     int lambda;
3776     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3777 #ifdef REFINE_STATS
3778 static int count=0;
3779 static int after_last=0;
3780 static int to_zero=0;
3781 static int from_zero=0;
3782 static int raise=0;
3783 static int lower=0;
3784 static int messed_sign=0;
3785 #endif
3786
3787     if(basis[0][0] == 0)
3788         build_basis(s->dsp.idct_permutation);
3789
3790     qmul= qscale*2;
3791     qadd= (qscale-1)|1;
3792     if (s->mb_intra) {
3793         if (!s->h263_aic) {
3794             if (n < 4)
3795                 q = s->y_dc_scale;
3796             else
3797                 q = s->c_dc_scale;
3798         } else{
3799             /* For AIC we skip quant/dequant of INTRADC */
3800             q = 1;
3801             qadd=0;
3802         }
3803         q <<= RECON_SHIFT-3;
3804         /* note: block[0] is assumed to be positive */
3805         dc= block[0]*q;
3806 //        block[0] = (block[0] + (q >> 1)) / q;
3807         start_i = 1;
3808 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3809 //            bias= 1<<(QMAT_SHIFT-1);
3810         length     = s->intra_ac_vlc_length;
3811         last_length= s->intra_ac_vlc_last_length;
3812     } else {
3813         dc= 0;
3814         start_i = 0;
3815         length     = s->inter_ac_vlc_length;
3816         last_length= s->inter_ac_vlc_last_length;
3817     }
3818     last_non_zero = s->block_last_index[n];
3819
3820 #ifdef REFINE_STATS
3821 {START_TIMER
3822 #endif
3823     dc += (1<<(RECON_SHIFT-1));
3824     for(i=0; i<64; i++){
3825         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3826     }
3827 #ifdef REFINE_STATS
3828 STOP_TIMER("memset rem[]")}
3829 #endif
3830     sum=0;
3831     for(i=0; i<64; i++){
3832         int one= 36;
3833         int qns=4;
3834         int w;
3835
3836         w= FFABS(weight[i]) + qns*one;
3837         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3838
3839         weight[i] = w;
3840 //        w=weight[i] = (63*qns + (w/2)) / w;
3841
3842         av_assert2(w>0);
3843         av_assert2(w<(1<<6));
3844         sum += w*w;
3845     }
3846     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3847 #ifdef REFINE_STATS
3848 {START_TIMER
3849 #endif
3850     run=0;
3851     rle_index=0;
3852     for(i=start_i; i<=last_non_zero; i++){
3853         int j= perm_scantable[i];
3854         const int level= block[j];
3855         int coeff;
3856
3857         if(level){
3858             if(level<0) coeff= qmul*level - qadd;
3859             else        coeff= qmul*level + qadd;
3860             run_tab[rle_index++]=run;
3861             run=0;
3862
3863             s->dsp.add_8x8basis(rem, basis[j], coeff);
3864         }else{
3865             run++;
3866         }
3867     }
3868 #ifdef REFINE_STATS
3869 if(last_non_zero>0){
3870 STOP_TIMER("init rem[]")
3871 }
3872 }
3873
3874 {START_TIMER
3875 #endif
3876     for(;;){
3877         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3878         int best_coeff=0;
3879         int best_change=0;
3880         int run2, best_unquant_change=0, analyze_gradient;
3881 #ifdef REFINE_STATS
3882 {START_TIMER
3883 #endif
3884         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3885
3886         if(analyze_gradient){
3887 #ifdef REFINE_STATS
3888 {START_TIMER
3889 #endif
3890             for(i=0; i<64; i++){
3891                 int w= weight[i];
3892
3893                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3894             }
3895 #ifdef REFINE_STATS
3896 STOP_TIMER("rem*w*w")}
3897 {START_TIMER
3898 #endif
3899             s->dsp.fdct(d1);
3900 #ifdef REFINE_STATS
3901 STOP_TIMER("dct")}
3902 #endif
3903         }
3904
3905         if(start_i){
3906             const int level= block[0];
3907             int change, old_coeff;
3908
3909             av_assert2(s->mb_intra);
3910
3911             old_coeff= q*level;
3912
3913             for(change=-1; change<=1; change+=2){
3914                 int new_level= level + change;
3915                 int score, new_coeff;
3916
3917                 new_coeff= q*new_level;
3918                 if(new_coeff >= 2048 || new_coeff < 0)
3919                     continue;
3920
3921                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3922                 if(score<best_score){
3923                     best_score= score;
3924                     best_coeff= 0;
3925                     best_change= change;
3926                     best_unquant_change= new_coeff - old_coeff;
3927                 }
3928             }
3929         }
3930
3931         run=0;
3932         rle_index=0;
3933         run2= run_tab[rle_index++];
3934         prev_level=0;
3935         prev_run=0;
3936
3937         for(i=start_i; i<64; i++){
3938             int j= perm_scantable[i];
3939             const int level= block[j];
3940             int change, old_coeff;
3941
3942             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3943                 break;
3944
3945             if(level){
3946                 if(level<0) old_coeff= qmul*level - qadd;
3947                 else        old_coeff= qmul*level + qadd;
3948                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3949             }else{
3950                 old_coeff=0;
3951                 run2--;
3952                 av_assert2(run2>=0 || i >= last_non_zero );
3953             }
3954
3955             for(change=-1; change<=1; change+=2){
3956                 int new_level= level + change;
3957                 int score, new_coeff, unquant_change;
3958
3959                 score=0;
3960                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3961                    continue;
3962
3963                 if(new_level){
3964                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3965                     else            new_coeff= qmul*new_level + qadd;
3966                     if(new_coeff >= 2048 || new_coeff <= -2048)
3967                         continue;
3968                     //FIXME check for overflow
3969
3970                     if(level){
3971                         if(level < 63 && level > -63){
3972                             if(i < last_non_zero)
3973                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3974                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3975                             else
3976                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3977                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3978                         }
3979                     }else{
3980                         av_assert2(FFABS(new_level)==1);
3981
3982                         if(analyze_gradient){
3983                             int g= d1[ scantable[i] ];
3984                             if(g && (g^new_level) >= 0)
3985                                 continue;
3986                         }
3987
3988                         if(i < last_non_zero){
3989                             int next_i= i + run2 + 1;
3990                             int next_level= block[ perm_scantable[next_i] ] + 64;
3991
3992                             if(next_level&(~127))
3993                                 next_level= 0;
3994
3995                             if(next_i < last_non_zero)
3996                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3997                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3998                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3999                             else
4000                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4001                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4002                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4003                         }else{
4004                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4005                             if(prev_level){
4006                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4007                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4008                             }
4009                         }
4010                     }
4011                 }else{
4012                     new_coeff=0;
4013                     av_assert2(FFABS(level)==1);
4014
4015                     if(i < last_non_zero){
4016                         int next_i= i + run2 + 1;
4017                         int next_level= block[ perm_scantable[next_i] ] + 64;
4018
4019                         if(next_level&(~127))
4020                             next_level= 0;
4021
4022                         if(next_i < last_non_zero)
4023                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4024                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4025                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4026                         else
4027                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4028                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4029                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4030                     }else{
4031                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4032                         if(prev_level){
4033                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4034                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4035                         }
4036                     }
4037                 }
4038
4039                 score *= lambda;
4040
4041                 unquant_change= new_coeff - old_coeff;
4042                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4043
4044                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4045                 if(score<best_score){
4046                     best_score= score;
4047                     best_coeff= i;
4048                     best_change= change;
4049                     best_unquant_change= unquant_change;
4050                 }
4051             }
4052             if(level){
4053                 prev_level= level + 64;
4054                 if(prev_level&(~127))
4055                     prev_level= 0;
4056                 prev_run= run;
4057                 run=0;
4058             }else{
4059                 run++;
4060             }
4061         }
4062 #ifdef REFINE_STATS
4063 STOP_TIMER("iterative step")}
4064 #endif
4065
4066         if(best_change){
4067             int j= perm_scantable[ best_coeff ];
4068
4069             block[j] += best_change;
4070
4071             if(best_coeff > last_non_zero){
4072                 last_non_zero= best_coeff;
4073                 av_assert2(block[j]);
4074 #ifdef REFINE_STATS
4075 after_last++;
4076 #endif
4077             }else{
4078 #ifdef REFINE_STATS
4079 if(block[j]){
4080     if(block[j] - best_change){
4081         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4082             raise++;
4083         }else{
4084             lower++;
4085         }
4086     }else{
4087         from_zero++;
4088     }
4089 }else{
4090     to_zero++;
4091 }
4092 #endif
4093                 for(; last_non_zero>=start_i; last_non_zero--){
4094                     if(block[perm_scantable[last_non_zero]])
4095                         break;
4096                 }
4097             }
4098 #ifdef REFINE_STATS
4099 count++;
4100 if(256*256*256*64 % count == 0){
4101     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4102 }
4103 #endif
4104             run=0;
4105             rle_index=0;
4106             for(i=start_i; i<=last_non_zero; i++){
4107                 int j= perm_scantable[i];
4108                 const int level= block[j];
4109
4110                  if(level){
4111                      run_tab[rle_index++]=run;
4112                      run=0;
4113                  }else{
4114                      run++;
4115                  }
4116             }
4117
4118             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4119         }else{
4120             break;
4121         }
4122     }
4123 #ifdef REFINE_STATS
4124 if(last_non_zero>0){
4125 STOP_TIMER("iterative search")
4126 }
4127 }
4128 #endif
4129
4130     return last_non_zero;
4131 }
4132
4133 int ff_dct_quantize_c(MpegEncContext *s,
4134                         DCTELEM *block, int n,
4135                         int qscale, int *overflow)
4136 {
4137     int i, j, level, last_non_zero, q, start_i;
4138     const int *qmat;
4139     const uint8_t *scantable= s->intra_scantable.scantable;
4140     int bias;
4141     int max=0;
4142     unsigned int threshold1, threshold2;
4143
4144     s->dsp.fdct (block);
4145
4146     if(s->dct_error_sum)
4147         s->denoise_dct(s, block);
4148
4149     if (s->mb_intra) {
4150         if (!s->h263_aic) {
4151             if (n < 4)
4152                 q = s->y_dc_scale;
4153             else
4154                 q = s->c_dc_scale;
4155             q = q << 3;
4156         } else
4157             /* For AIC we skip quant/dequant of INTRADC */
4158             q = 1 << 3;
4159
4160         /* note: block[0] is assumed to be positive */
4161         block[0] = (block[0] + (q >> 1)) / q;
4162         start_i = 1;
4163         last_non_zero = 0;
4164         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4165         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4166     } else {
4167         start_i = 0;
4168         last_non_zero = -1;
4169         qmat = s->q_inter_matrix[qscale];
4170         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4171     }
4172     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4173     threshold2= (threshold1<<1);
4174     for(i=63;i>=start_i;i--) {
4175         j = scantable[i];
4176         level = block[j] * qmat[j];
4177
4178         if(((unsigned)(level+threshold1))>threshold2){
4179             last_non_zero = i;
4180             break;
4181         }else{
4182             block[j]=0;
4183         }
4184     }
4185     for(i=start_i; i<=last_non_zero; i++) {
4186         j = scantable[i];
4187         level = block[j] * qmat[j];
4188
4189 //        if(   bias+level >= (1<<QMAT_SHIFT)
4190 //           || bias-level >= (1<<QMAT_SHIFT)){
4191         if(((unsigned)(level+threshold1))>threshold2){
4192             if(level>0){
4193                 level= (bias + level)>>QMAT_SHIFT;
4194                 block[j]= level;
4195             }else{
4196                 level= (bias - level)>>QMAT_SHIFT;
4197                 block[j]= -level;
4198             }
4199             max |=level;
4200         }else{
4201             block[j]=0;
4202         }
4203     }
4204     *overflow= s->max_qcoeff < max; //overflow might have happened
4205
4206     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4207     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4208         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4209
4210     return last_non_zero;
4211 }
4212
4213 #define OFFSET(x) offsetof(MpegEncContext, x)
4214 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4215 static const AVOption h263_options[] = {
4216     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4217     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4218     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4219     FF_MPV_COMMON_OPTS
4220     { NULL },
4221 };
4222
4223 static const AVClass h263_class = {
4224     .class_name = "H.263 encoder",
4225     .item_name  = av_default_item_name,
4226     .option     = h263_options,
4227     .version    = LIBAVUTIL_VERSION_INT,
4228 };
4229
4230 AVCodec ff_h263_encoder = {
4231     .name           = "h263",
4232     .type           = AVMEDIA_TYPE_VIDEO,
4233     .id             = AV_CODEC_ID_H263,
4234     .priv_data_size = sizeof(MpegEncContext),
4235     .init           = ff_MPV_encode_init,
4236     .encode2        = ff_MPV_encode_picture,
4237     .close          = ff_MPV_encode_end,
4238     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4239     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4240     .priv_class     = &h263_class,
4241 };
4242
4243 static const AVOption h263p_options[] = {
4244     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4245     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4246     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4247     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4248     FF_MPV_COMMON_OPTS
4249     { NULL },
4250 };
4251 static const AVClass h263p_class = {
4252     .class_name = "H.263p encoder",
4253     .item_name  = av_default_item_name,
4254     .option     = h263p_options,
4255     .version    = LIBAVUTIL_VERSION_INT,
4256 };
4257
4258 AVCodec ff_h263p_encoder = {
4259     .name           = "h263p",
4260     .type           = AVMEDIA_TYPE_VIDEO,
4261     .id             = AV_CODEC_ID_H263P,
4262     .priv_data_size = sizeof(MpegEncContext),
4263     .init           = ff_MPV_encode_init,
4264     .encode2        = ff_MPV_encode_picture,
4265     .close          = ff_MPV_encode_end,
4266     .capabilities   = CODEC_CAP_SLICE_THREADS,
4267     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4268     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4269     .priv_class     = &h263p_class,
4270 };
4271
4272 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4273
4274 AVCodec ff_msmpeg4v2_encoder = {
4275     .name           = "msmpeg4v2",
4276     .type           = AVMEDIA_TYPE_VIDEO,
4277     .id             = AV_CODEC_ID_MSMPEG4V2,
4278     .priv_data_size = sizeof(MpegEncContext),
4279     .init           = ff_MPV_encode_init,
4280     .encode2        = ff_MPV_encode_picture,
4281     .close          = ff_MPV_encode_end,
4282     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4283     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4284     .priv_class     = &msmpeg4v2_class,
4285 };
4286
4287 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4288
4289 AVCodec ff_msmpeg4v3_encoder = {
4290     .name           = "msmpeg4",
4291     .type           = AVMEDIA_TYPE_VIDEO,
4292     .id             = AV_CODEC_ID_MSMPEG4V3,
4293     .priv_data_size = sizeof(MpegEncContext),
4294     .init           = ff_MPV_encode_init,
4295     .encode2        = ff_MPV_encode_picture,
4296     .close          = ff_MPV_encode_end,
4297     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4298     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4299     .priv_class     = &msmpeg4v3_class,
4300 };
4301
4302 FF_MPV_GENERIC_CLASS(wmv1)
4303
4304 AVCodec ff_wmv1_encoder = {
4305     .name           = "wmv1",
4306     .type           = AVMEDIA_TYPE_VIDEO,
4307     .id             = AV_CODEC_ID_WMV1,
4308     .priv_data_size = sizeof(MpegEncContext),
4309     .init           = ff_MPV_encode_init,
4310     .encode2        = ff_MPV_encode_picture,
4311     .close          = ff_MPV_encode_end,
4312     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4313     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4314     .priv_class     = &wmv1_class,
4315 };