]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo: remove FMT_H264
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpegvideo.h"
39 #include "h263.h"
40 #include "mathops.h"
41 #include "mjpegenc.h"
42 #include "msmpeg4.h"
43 #include "faandct.h"
44 #include "thread.h"
45 #include "aandcttab.h"
46 #include "flv.h"
47 #include "mpeg4video.h"
48 #include "internal.h"
49 #include "bytestream.h"
50 #include <limits.h>
51
52 //#undef NDEBUG
53 //#include <assert.h>
54
55 static int encode_picture(MpegEncContext *s, int picture_number);
56 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
57 static int sse_mb(MpegEncContext *s);
58 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
59 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
60
61 /* enable all paranoid tests for rounding, overflows, etc... */
62 //#define PARANOID
63
64 //#define DEBUG
65
66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
68
69 const AVOption ff_mpv_generic_options[] = {
70     FF_MPV_COMMON_OPTS
71     { NULL },
72 };
73
74 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
75                        uint16_t (*qmat16)[2][64],
76                        const uint16_t *quant_matrix,
77                        int bias, int qmin, int qmax, int intra)
78 {
79     int qscale;
80     int shift = 0;
81
82     for (qscale = qmin; qscale <= qmax; qscale++) {
83         int i;
84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
86             dsp->fdct == ff_faandct) {
87             for (i = 0; i < 64; i++) {
88                 const int j = dsp->idct_permutation[i];
89                 /* 16 <= qscale * quant_matrix[i] <= 7905
90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
91                  *             19952 <=              x  <= 249205026
92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
93                  *           3444240 >= (1 << 36) / (x) >= 275 */
94
95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
96                                         (qscale * quant_matrix[j]));
97             }
98         } else if (dsp->fdct == ff_fdct_ifast) {
99             for (i = 0; i < 64; i++) {
100                 const int j = dsp->idct_permutation[i];
101                 /* 16 <= qscale * quant_matrix[i] <= 7905
102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
103                  *             19952 <=              x  <= 249205026
104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
105                  *           3444240 >= (1 << 36) / (x) >= 275 */
106
107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
108                                         (ff_aanscales[i] * qscale *
109                                          quant_matrix[j]));
110             }
111         } else {
112             for (i = 0; i < 64; i++) {
113                 const int j = dsp->idct_permutation[i];
114                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
115                  * Assume x = qscale * quant_matrix[i]
116                  * So             16 <=              x  <= 7905
117                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
118                  * so          32768 >= (1 << 19) / (x) >= 67 */
119                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
120                                         (qscale * quant_matrix[j]));
121                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
122                 //                    (qscale * quant_matrix[i]);
123                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
124                                        (qscale * quant_matrix[j]);
125
126                 if (qmat16[qscale][0][i] == 0 ||
127                     qmat16[qscale][0][i] == 128 * 256)
128                     qmat16[qscale][0][i] = 128 * 256 - 1;
129                 qmat16[qscale][1][i] =
130                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
131                                 qmat16[qscale][0][i]);
132             }
133         }
134
135         for (i = intra; i < 64; i++) {
136             int64_t max = 8191;
137             if (dsp->fdct == ff_fdct_ifast) {
138                 max = (8191LL * ff_aanscales[i]) >> 14;
139             }
140             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
141                 shift++;
142             }
143         }
144     }
145     if (shift) {
146         av_log(NULL, AV_LOG_INFO,
147                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
148                QMAT_SHIFT - shift);
149     }
150 }
151
152 static inline void update_qscale(MpegEncContext *s)
153 {
154     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
155                 (FF_LAMBDA_SHIFT + 7);
156     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
157
158     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
159                  FF_LAMBDA_SHIFT;
160 }
161
162 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
163 {
164     int i;
165
166     if (matrix) {
167         put_bits(pb, 1, 1);
168         for (i = 0; i < 64; i++) {
169             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
170         }
171     } else
172         put_bits(pb, 1, 0);
173 }
174
175 /**
176  * init s->current_picture.qscale_table from s->lambda_table
177  */
178 void ff_init_qscale_tab(MpegEncContext *s)
179 {
180     int8_t * const qscale_table = s->current_picture.qscale_table;
181     int i;
182
183     for (i = 0; i < s->mb_num; i++) {
184         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
185         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
186         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
187                                                   s->avctx->qmax);
188     }
189 }
190
191 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst,
192                                     const AVFrame *src)
193 {
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202 }
203
204 static void update_duplicate_context_after_me(MpegEncContext *dst,
205                                               MpegEncContext *src)
206 {
207 #define COPY(a) dst->a= src->a
208     COPY(pict_type);
209     COPY(current_picture);
210     COPY(f_code);
211     COPY(b_code);
212     COPY(qscale);
213     COPY(lambda);
214     COPY(lambda2);
215     COPY(picture_in_gop_number);
216     COPY(gop_picture_number);
217     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
218     COPY(progressive_frame);    // FIXME don't set in encode_header
219     COPY(partitioned_frame);    // FIXME don't set in encode_header
220 #undef COPY
221 }
222
223 /**
224  * Set the given MpegEncContext to defaults for encoding.
225  * the changed fields will not depend upon the prior state of the MpegEncContext.
226  */
227 static void MPV_encode_defaults(MpegEncContext *s)
228 {
229     int i;
230     ff_MPV_common_defaults(s);
231
232     for (i = -16; i < 16; i++) {
233         default_fcode_tab[i + MAX_MV] = 1;
234     }
235     s->me.mv_penalty = default_mv_penalty;
236     s->fcode_tab     = default_fcode_tab;
237 }
238
239 /* init video encoder */
240 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
241 {
242     MpegEncContext *s = avctx->priv_data;
243     int i;
244     int chroma_h_shift, chroma_v_shift;
245
246     MPV_encode_defaults(s);
247
248     switch (avctx->codec_id) {
249     case AV_CODEC_ID_MPEG2VIDEO:
250         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
251             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
252             av_log(avctx, AV_LOG_ERROR,
253                    "only YUV420 and YUV422 are supported\n");
254             return -1;
255         }
256         break;
257     case AV_CODEC_ID_LJPEG:
258         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
259             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
260             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
261             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
262             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
263               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
264               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
265              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
266             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
267             return -1;
268         }
269         break;
270     case AV_CODEC_ID_MJPEG:
271         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
272             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
273             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
274               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
275              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
276             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
277             return -1;
278         }
279         break;
280     default:
281         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
282             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
283             return -1;
284         }
285     }
286
287     switch (avctx->pix_fmt) {
288     case AV_PIX_FMT_YUVJ422P:
289     case AV_PIX_FMT_YUV422P:
290         s->chroma_format = CHROMA_422;
291         break;
292     case AV_PIX_FMT_YUVJ420P:
293     case AV_PIX_FMT_YUV420P:
294     default:
295         s->chroma_format = CHROMA_420;
296         break;
297     }
298
299     s->bit_rate = avctx->bit_rate;
300     s->width    = avctx->width;
301     s->height   = avctx->height;
302     if (avctx->gop_size > 600 &&
303         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
304         av_log(avctx, AV_LOG_ERROR,
305                "Warning keyframe interval too large! reducing it ...\n");
306         avctx->gop_size = 600;
307     }
308     s->gop_size     = avctx->gop_size;
309     s->avctx        = avctx;
310     s->flags        = avctx->flags;
311     s->flags2       = avctx->flags2;
312     s->max_b_frames = avctx->max_b_frames;
313     s->codec_id     = avctx->codec->id;
314     s->strict_std_compliance = avctx->strict_std_compliance;
315     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
316     s->mpeg_quant         = avctx->mpeg_quant;
317     s->rtp_mode           = !!avctx->rtp_payload_size;
318     s->intra_dc_precision = avctx->intra_dc_precision;
319     s->user_specified_pts = AV_NOPTS_VALUE;
320
321     if (s->gop_size <= 1) {
322         s->intra_only = 1;
323         s->gop_size   = 12;
324     } else {
325         s->intra_only = 0;
326     }
327
328     s->me_method = avctx->me_method;
329
330     /* Fixed QSCALE */
331     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
332
333     s->adaptive_quant = (s->avctx->lumi_masking ||
334                          s->avctx->dark_masking ||
335                          s->avctx->temporal_cplx_masking ||
336                          s->avctx->spatial_cplx_masking  ||
337                          s->avctx->p_masking      ||
338                          s->avctx->border_masking ||
339                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
340                         !s->fixed_qscale;
341
342     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
343
344     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
345         av_log(avctx, AV_LOG_ERROR,
346                "a vbv buffer size is needed, "
347                "for encoding with a maximum bitrate\n");
348         return -1;
349     }
350
351     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
352         av_log(avctx, AV_LOG_INFO,
353                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
354     }
355
356     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
357         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
358         return -1;
359     }
360
361     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
362         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
363         return -1;
364     }
365
366     if (avctx->rc_max_rate &&
367         avctx->rc_max_rate == avctx->bit_rate &&
368         avctx->rc_max_rate != avctx->rc_min_rate) {
369         av_log(avctx, AV_LOG_INFO,
370                "impossible bitrate constraints, this will fail\n");
371     }
372
373     if (avctx->rc_buffer_size &&
374         avctx->bit_rate * (int64_t)avctx->time_base.num >
375             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
376         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
377         return -1;
378     }
379
380     if (!s->fixed_qscale &&
381         avctx->bit_rate * av_q2d(avctx->time_base) >
382             avctx->bit_rate_tolerance) {
383         av_log(avctx, AV_LOG_ERROR,
384                "bitrate tolerance too small for bitrate\n");
385         return -1;
386     }
387
388     if (s->avctx->rc_max_rate &&
389         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
390         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
391          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
392         90000LL * (avctx->rc_buffer_size - 1) >
393             s->avctx->rc_max_rate * 0xFFFFLL) {
394         av_log(avctx, AV_LOG_INFO,
395                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
396                "specified vbv buffer is too large for the given bitrate!\n");
397     }
398
399     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
400         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
401         s->codec_id != AV_CODEC_ID_FLV1) {
402         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
403         return -1;
404     }
405
406     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
407         av_log(avctx, AV_LOG_ERROR,
408                "OBMC is only supported with simple mb decision\n");
409         return -1;
410     }
411
412     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
413         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
414         return -1;
415     }
416
417     if (s->max_b_frames                    &&
418         s->codec_id != AV_CODEC_ID_MPEG4      &&
419         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
420         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
421         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
422         return -1;
423     }
424
425     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
426          s->codec_id == AV_CODEC_ID_H263  ||
427          s->codec_id == AV_CODEC_ID_H263P) &&
428         (avctx->sample_aspect_ratio.num > 255 ||
429          avctx->sample_aspect_ratio.den > 255)) {
430         av_log(avctx, AV_LOG_ERROR,
431                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
432                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
433         return -1;
434     }
435
436     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
437         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
438         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
439         return -1;
440     }
441
442     // FIXME mpeg2 uses that too
443     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
444         av_log(avctx, AV_LOG_ERROR,
445                "mpeg2 style quantization not supported by codec\n");
446         return -1;
447     }
448
449     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
450         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
451         return -1;
452     }
453
454     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
455         s->avctx->mb_decision != FF_MB_DECISION_RD) {
456         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
457         return -1;
458     }
459
460     if (s->avctx->scenechange_threshold < 1000000000 &&
461         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
462         av_log(avctx, AV_LOG_ERROR,
463                "closed gop with scene change detection are not supported yet, "
464                "set threshold to 1000000000\n");
465         return -1;
466     }
467
468     if (s->flags & CODEC_FLAG_LOW_DELAY) {
469         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
470             av_log(avctx, AV_LOG_ERROR,
471                   "low delay forcing is only available for mpeg2\n");
472             return -1;
473         }
474         if (s->max_b_frames != 0) {
475             av_log(avctx, AV_LOG_ERROR,
476                    "b frames cannot be used with low delay\n");
477             return -1;
478         }
479     }
480
481     if (s->q_scale_type == 1) {
482         if (avctx->qmax > 12) {
483             av_log(avctx, AV_LOG_ERROR,
484                    "non linear quant only supports qmax <= 12 currently\n");
485             return -1;
486         }
487     }
488
489     if (s->avctx->thread_count > 1         &&
490         s->codec_id != AV_CODEC_ID_MPEG4      &&
491         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
492         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
493         (s->codec_id != AV_CODEC_ID_H263P)) {
494         av_log(avctx, AV_LOG_ERROR,
495                "multi threaded encoding not supported by codec\n");
496         return -1;
497     }
498
499     if (s->avctx->thread_count < 1) {
500         av_log(avctx, AV_LOG_ERROR,
501                "automatic thread number detection not supported by codec,"
502                "patch welcome\n");
503         return -1;
504     }
505
506     if (s->avctx->thread_count > 1)
507         s->rtp_mode = 1;
508
509     if (!avctx->time_base.den || !avctx->time_base.num) {
510         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
511         return -1;
512     }
513
514     i = (INT_MAX / 2 + 128) >> 8;
515     if (avctx->mb_threshold >= i) {
516         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
517                i - 1);
518         return -1;
519     }
520
521     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
522         av_log(avctx, AV_LOG_INFO,
523                "notice: b_frame_strategy only affects the first pass\n");
524         avctx->b_frame_strategy = 0;
525     }
526
527     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
528     if (i > 1) {
529         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
530         avctx->time_base.den /= i;
531         avctx->time_base.num /= i;
532         //return -1;
533     }
534
535     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
536         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
537         // (a + x * 3 / 8) / x
538         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
539         s->inter_quant_bias = 0;
540     } else {
541         s->intra_quant_bias = 0;
542         // (a - x / 4) / x
543         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
544     }
545
546     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
547         s->intra_quant_bias = avctx->intra_quant_bias;
548     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
549         s->inter_quant_bias = avctx->inter_quant_bias;
550
551     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
552                                      &chroma_v_shift);
553
554     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
555         s->avctx->time_base.den > (1 << 16) - 1) {
556         av_log(avctx, AV_LOG_ERROR,
557                "timebase %d/%d not supported by MPEG 4 standard, "
558                "the maximum admitted value for the timebase denominator "
559                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
560                (1 << 16) - 1);
561         return -1;
562     }
563     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
564
565     switch (avctx->codec->id) {
566     case AV_CODEC_ID_MPEG1VIDEO:
567         s->out_format = FMT_MPEG1;
568         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
569         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
570         break;
571     case AV_CODEC_ID_MPEG2VIDEO:
572         s->out_format = FMT_MPEG1;
573         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
574         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
575         s->rtp_mode   = 1;
576         break;
577     case AV_CODEC_ID_LJPEG:
578     case AV_CODEC_ID_MJPEG:
579         s->out_format = FMT_MJPEG;
580         s->intra_only = 1; /* force intra only for jpeg */
581         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
582             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
583             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
584             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
585             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
586         } else {
587             s->mjpeg_vsample[0] = 2;
588             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
589             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
590             s->mjpeg_hsample[0] = 2;
591             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
592             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
593         }
594         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
595             ff_mjpeg_encode_init(s) < 0)
596             return -1;
597         avctx->delay = 0;
598         s->low_delay = 1;
599         break;
600     case AV_CODEC_ID_H261:
601         if (!CONFIG_H261_ENCODER)
602             return -1;
603         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
604             av_log(avctx, AV_LOG_ERROR,
605                    "The specified picture size of %dx%d is not valid for the "
606                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
607                     s->width, s->height);
608             return -1;
609         }
610         s->out_format = FMT_H261;
611         avctx->delay  = 0;
612         s->low_delay  = 1;
613         break;
614     case AV_CODEC_ID_H263:
615         if (!CONFIG_H263_ENCODER)
616         return -1;
617         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
618                              s->width, s->height) == 8) {
619             av_log(avctx, AV_LOG_INFO,
620                    "The specified picture size of %dx%d is not valid for "
621                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
622                    "352x288, 704x576, and 1408x1152."
623                    "Try H.263+.\n", s->width, s->height);
624             return -1;
625         }
626         s->out_format = FMT_H263;
627         avctx->delay  = 0;
628         s->low_delay  = 1;
629         break;
630     case AV_CODEC_ID_H263P:
631         s->out_format = FMT_H263;
632         s->h263_plus  = 1;
633         /* Fx */
634         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
635         s->modified_quant  = s->h263_aic;
636         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
637         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
638
639         /* /Fx */
640         /* These are just to be sure */
641         avctx->delay = 0;
642         s->low_delay = 1;
643         break;
644     case AV_CODEC_ID_FLV1:
645         s->out_format      = FMT_H263;
646         s->h263_flv        = 2; /* format = 1; 11-bit codes */
647         s->unrestricted_mv = 1;
648         s->rtp_mode  = 0; /* don't allow GOB */
649         avctx->delay = 0;
650         s->low_delay = 1;
651         break;
652     case AV_CODEC_ID_RV10:
653         s->out_format = FMT_H263;
654         avctx->delay  = 0;
655         s->low_delay  = 1;
656         break;
657     case AV_CODEC_ID_RV20:
658         s->out_format      = FMT_H263;
659         avctx->delay       = 0;
660         s->low_delay       = 1;
661         s->modified_quant  = 1;
662         s->h263_aic        = 1;
663         s->h263_plus       = 1;
664         s->loop_filter     = 1;
665         s->unrestricted_mv = 0;
666         break;
667     case AV_CODEC_ID_MPEG4:
668         s->out_format      = FMT_H263;
669         s->h263_pred       = 1;
670         s->unrestricted_mv = 1;
671         s->low_delay       = s->max_b_frames ? 0 : 1;
672         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
673         break;
674     case AV_CODEC_ID_MSMPEG4V2:
675         s->out_format      = FMT_H263;
676         s->h263_pred       = 1;
677         s->unrestricted_mv = 1;
678         s->msmpeg4_version = 2;
679         avctx->delay       = 0;
680         s->low_delay       = 1;
681         break;
682     case AV_CODEC_ID_MSMPEG4V3:
683         s->out_format        = FMT_H263;
684         s->h263_pred         = 1;
685         s->unrestricted_mv   = 1;
686         s->msmpeg4_version   = 3;
687         s->flipflop_rounding = 1;
688         avctx->delay         = 0;
689         s->low_delay         = 1;
690         break;
691     case AV_CODEC_ID_WMV1:
692         s->out_format        = FMT_H263;
693         s->h263_pred         = 1;
694         s->unrestricted_mv   = 1;
695         s->msmpeg4_version   = 4;
696         s->flipflop_rounding = 1;
697         avctx->delay         = 0;
698         s->low_delay         = 1;
699         break;
700     case AV_CODEC_ID_WMV2:
701         s->out_format        = FMT_H263;
702         s->h263_pred         = 1;
703         s->unrestricted_mv   = 1;
704         s->msmpeg4_version   = 5;
705         s->flipflop_rounding = 1;
706         avctx->delay         = 0;
707         s->low_delay         = 1;
708         break;
709     default:
710         return -1;
711     }
712
713     avctx->has_b_frames = !s->low_delay;
714
715     s->encoding = 1;
716
717     s->progressive_frame    =
718     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
719                                                 CODEC_FLAG_INTERLACED_ME) ||
720                                 s->alternate_scan);
721
722     /* init */
723     if (ff_MPV_common_init(s) < 0)
724         return -1;
725
726     if (ARCH_X86)
727         ff_MPV_encode_init_x86(s);
728
729     if (!s->dct_quantize)
730         s->dct_quantize = ff_dct_quantize_c;
731     if (!s->denoise_dct)
732         s->denoise_dct  = denoise_dct_c;
733     s->fast_dct_quantize = s->dct_quantize;
734     if (avctx->trellis)
735         s->dct_quantize  = dct_quantize_trellis_c;
736
737     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
738         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
739
740     s->quant_precision = 5;
741
742     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
743     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
744
745     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
746         ff_h261_encode_init(s);
747     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
748         ff_h263_encode_init(s);
749     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
750         ff_msmpeg4_encode_init(s);
751     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
752         && s->out_format == FMT_MPEG1)
753         ff_mpeg1_encode_init(s);
754
755     /* init q matrix */
756     for (i = 0; i < 64; i++) {
757         int j = s->dsp.idct_permutation[i];
758         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
759             s->mpeg_quant) {
760             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
761             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
762         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
763             s->intra_matrix[j] =
764             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
765         } else {
766             /* mpeg1/2 */
767             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
768             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
769         }
770         if (s->avctx->intra_matrix)
771             s->intra_matrix[j] = s->avctx->intra_matrix[i];
772         if (s->avctx->inter_matrix)
773             s->inter_matrix[j] = s->avctx->inter_matrix[i];
774     }
775
776     /* precompute matrix */
777     /* for mjpeg, we do include qscale in the matrix */
778     if (s->out_format != FMT_MJPEG) {
779         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
780                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
781                           31, 1);
782         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
783                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
784                           31, 0);
785     }
786
787     if (ff_rate_control_init(s) < 0)
788         return -1;
789
790     return 0;
791 }
792
793 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
794 {
795     MpegEncContext *s = avctx->priv_data;
796
797     ff_rate_control_uninit(s);
798
799     ff_MPV_common_end(s);
800     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
801         s->out_format == FMT_MJPEG)
802         ff_mjpeg_encode_close(s);
803
804     av_freep(&avctx->extradata);
805
806     return 0;
807 }
808
809 static int get_sae(uint8_t *src, int ref, int stride)
810 {
811     int x,y;
812     int acc = 0;
813
814     for (y = 0; y < 16; y++) {
815         for (x = 0; x < 16; x++) {
816             acc += FFABS(src[x + y * stride] - ref);
817         }
818     }
819
820     return acc;
821 }
822
823 static int get_intra_count(MpegEncContext *s, uint8_t *src,
824                            uint8_t *ref, int stride)
825 {
826     int x, y, w, h;
827     int acc = 0;
828
829     w = s->width  & ~15;
830     h = s->height & ~15;
831
832     for (y = 0; y < h; y += 16) {
833         for (x = 0; x < w; x += 16) {
834             int offset = x + y * stride;
835             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
836                                      16);
837             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
838             int sae  = get_sae(src + offset, mean, stride);
839
840             acc += sae + 500 < sad;
841         }
842     }
843     return acc;
844 }
845
846
847 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
848 {
849     Picture *pic = NULL;
850     int64_t pts;
851     int i, display_picture_number = 0, ret;
852     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
853                                                  (s->low_delay ? 0 : 1);
854     int direct = 1;
855
856     if (pic_arg) {
857         pts = pic_arg->pts;
858         display_picture_number = s->input_picture_number++;
859
860         if (pts != AV_NOPTS_VALUE) {
861             if (s->user_specified_pts != AV_NOPTS_VALUE) {
862                 int64_t time = pts;
863                 int64_t last = s->user_specified_pts;
864
865                 if (time <= last) {
866                     av_log(s->avctx, AV_LOG_ERROR,
867                            "Error, Invalid timestamp=%"PRId64", "
868                            "last=%"PRId64"\n", pts, s->user_specified_pts);
869                     return -1;
870                 }
871
872                 if (!s->low_delay && display_picture_number == 1)
873                     s->dts_delta = time - last;
874             }
875             s->user_specified_pts = pts;
876         } else {
877             if (s->user_specified_pts != AV_NOPTS_VALUE) {
878                 s->user_specified_pts =
879                 pts = s->user_specified_pts + 1;
880                 av_log(s->avctx, AV_LOG_INFO,
881                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
882                        pts);
883             } else {
884                 pts = display_picture_number;
885             }
886         }
887     }
888
889     if (pic_arg) {
890         if (!pic_arg->buf[0]);
891             direct = 0;
892         if (pic_arg->linesize[0] != s->linesize)
893             direct = 0;
894         if (pic_arg->linesize[1] != s->uvlinesize)
895             direct = 0;
896         if (pic_arg->linesize[2] != s->uvlinesize)
897             direct = 0;
898
899         av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
900                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
901
902         if (direct) {
903             i = ff_find_unused_picture(s, 1);
904             if (i < 0)
905                 return i;
906
907             pic = &s->picture[i];
908             pic->reference = 3;
909
910             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
911                 return ret;
912             if (ff_alloc_picture(s, pic, 1) < 0) {
913                 return -1;
914             }
915         } else {
916             i = ff_find_unused_picture(s, 0);
917             if (i < 0)
918                 return i;
919
920             pic = &s->picture[i];
921             pic->reference = 3;
922
923             if (ff_alloc_picture(s, pic, 0) < 0) {
924                 return -1;
925             }
926
927             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
928                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
929                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
930                 // empty
931             } else {
932                 int h_chroma_shift, v_chroma_shift;
933                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
934                                                  &h_chroma_shift,
935                                                  &v_chroma_shift);
936
937                 for (i = 0; i < 3; i++) {
938                     int src_stride = pic_arg->linesize[i];
939                     int dst_stride = i ? s->uvlinesize : s->linesize;
940                     int h_shift = i ? h_chroma_shift : 0;
941                     int v_shift = i ? v_chroma_shift : 0;
942                     int w = s->width  >> h_shift;
943                     int h = s->height >> v_shift;
944                     uint8_t *src = pic_arg->data[i];
945                     uint8_t *dst = pic->f.data[i];
946
947                     if (!s->avctx->rc_buffer_size)
948                         dst += INPLACE_OFFSET;
949
950                     if (src_stride == dst_stride)
951                         memcpy(dst, src, src_stride * h);
952                     else {
953                         while (h--) {
954                             memcpy(dst, src, w);
955                             dst += dst_stride;
956                             src += src_stride;
957                         }
958                     }
959                 }
960             }
961         }
962         copy_picture_attributes(s, &pic->f, pic_arg);
963         pic->f.display_picture_number = display_picture_number;
964         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
965     }
966
967     /* shift buffer entries */
968     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
969         s->input_picture[i - 1] = s->input_picture[i];
970
971     s->input_picture[encoding_delay] = (Picture*) pic;
972
973     return 0;
974 }
975
976 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
977 {
978     int x, y, plane;
979     int score = 0;
980     int64_t score64 = 0;
981
982     for (plane = 0; plane < 3; plane++) {
983         const int stride = p->f.linesize[plane];
984         const int bw = plane ? 1 : 2;
985         for (y = 0; y < s->mb_height * bw; y++) {
986             for (x = 0; x < s->mb_width * bw; x++) {
987                 int off = p->shared ? 0 : 16;
988                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
989                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
990                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
991
992                 switch (s->avctx->frame_skip_exp) {
993                 case 0: score    =  FFMAX(score, v);          break;
994                 case 1: score   += FFABS(v);                  break;
995                 case 2: score   += v * v;                     break;
996                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
997                 case 4: score64 += v * v * (int64_t)(v * v);  break;
998                 }
999             }
1000         }
1001     }
1002
1003     if (score)
1004         score64 = score;
1005
1006     if (score64 < s->avctx->frame_skip_threshold)
1007         return 1;
1008     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1009         return 1;
1010     return 0;
1011 }
1012
1013 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1014 {
1015     AVPacket pkt = { 0 };
1016     int ret, got_output;
1017
1018     av_init_packet(&pkt);
1019     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1020     if (ret < 0)
1021         return ret;
1022
1023     ret = pkt.size;
1024     av_free_packet(&pkt);
1025     return ret;
1026 }
1027
1028 static int estimate_best_b_count(MpegEncContext *s)
1029 {
1030     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1031     AVCodecContext *c = avcodec_alloc_context3(NULL);
1032     AVFrame input[FF_MAX_B_FRAMES + 2];
1033     const int scale = s->avctx->brd_scale;
1034     int i, j, out_size, p_lambda, b_lambda, lambda2;
1035     int64_t best_rd  = INT64_MAX;
1036     int best_b_count = -1;
1037
1038     assert(scale >= 0 && scale <= 3);
1039
1040     //emms_c();
1041     //s->next_picture_ptr->quality;
1042     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1043     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1044     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1045     if (!b_lambda) // FIXME we should do this somewhere else
1046         b_lambda = p_lambda;
1047     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1048                FF_LAMBDA_SHIFT;
1049
1050     c->width        = s->width  >> scale;
1051     c->height       = s->height >> scale;
1052     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1053                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1054     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1055     c->mb_decision  = s->avctx->mb_decision;
1056     c->me_cmp       = s->avctx->me_cmp;
1057     c->mb_cmp       = s->avctx->mb_cmp;
1058     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1059     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1060     c->time_base    = s->avctx->time_base;
1061     c->max_b_frames = s->max_b_frames;
1062
1063     if (avcodec_open2(c, codec, NULL) < 0)
1064         return -1;
1065
1066     for (i = 0; i < s->max_b_frames + 2; i++) {
1067         int ysize = c->width * c->height;
1068         int csize = (c->width / 2) * (c->height / 2);
1069         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1070                                                 s->next_picture_ptr;
1071
1072         avcodec_get_frame_defaults(&input[i]);
1073         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1074         input[i].data[1]     = input[i].data[0] + ysize;
1075         input[i].data[2]     = input[i].data[1] + csize;
1076         input[i].linesize[0] = c->width;
1077         input[i].linesize[1] =
1078         input[i].linesize[2] = c->width / 2;
1079
1080         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1081             pre_input = *pre_input_ptr;
1082
1083             if (!pre_input.shared && i) {
1084                 pre_input.f.data[0] += INPLACE_OFFSET;
1085                 pre_input.f.data[1] += INPLACE_OFFSET;
1086                 pre_input.f.data[2] += INPLACE_OFFSET;
1087             }
1088
1089             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1090                                  pre_input.f.data[0], pre_input.f.linesize[0],
1091                                  c->width,      c->height);
1092             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1093                                  pre_input.f.data[1], pre_input.f.linesize[1],
1094                                  c->width >> 1, c->height >> 1);
1095             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1096                                  pre_input.f.data[2], pre_input.f.linesize[2],
1097                                  c->width >> 1, c->height >> 1);
1098         }
1099     }
1100
1101     for (j = 0; j < s->max_b_frames + 1; j++) {
1102         int64_t rd = 0;
1103
1104         if (!s->input_picture[j])
1105             break;
1106
1107         c->error[0] = c->error[1] = c->error[2] = 0;
1108
1109         input[0].pict_type = AV_PICTURE_TYPE_I;
1110         input[0].quality   = 1 * FF_QP2LAMBDA;
1111
1112         out_size = encode_frame(c, &input[0]);
1113
1114         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1115
1116         for (i = 0; i < s->max_b_frames + 1; i++) {
1117             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1118
1119             input[i + 1].pict_type = is_p ?
1120                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1121             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1122
1123             out_size = encode_frame(c, &input[i + 1]);
1124
1125             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1126         }
1127
1128         /* get the delayed frames */
1129         while (out_size) {
1130             out_size = encode_frame(c, NULL);
1131             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1132         }
1133
1134         rd += c->error[0] + c->error[1] + c->error[2];
1135
1136         if (rd < best_rd) {
1137             best_rd = rd;
1138             best_b_count = j;
1139         }
1140     }
1141
1142     avcodec_close(c);
1143     av_freep(&c);
1144
1145     for (i = 0; i < s->max_b_frames + 2; i++) {
1146         av_freep(&input[i].data[0]);
1147     }
1148
1149     return best_b_count;
1150 }
1151
1152 static int select_input_picture(MpegEncContext *s)
1153 {
1154     int i, ret;
1155
1156     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1157         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1158     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1159
1160     /* set next picture type & ordering */
1161     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1162         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1163             s->next_picture_ptr == NULL || s->intra_only) {
1164             s->reordered_input_picture[0] = s->input_picture[0];
1165             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1166             s->reordered_input_picture[0]->f.coded_picture_number =
1167                 s->coded_picture_number++;
1168         } else {
1169             int b_frames;
1170
1171             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1172                 if (s->picture_in_gop_number < s->gop_size &&
1173                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1174                     // FIXME check that te gop check above is +-1 correct
1175                     av_frame_unref(&s->input_picture[0]->f);
1176
1177                     emms_c();
1178                     ff_vbv_update(s, 0);
1179
1180                     goto no_output_pic;
1181                 }
1182             }
1183
1184             if (s->flags & CODEC_FLAG_PASS2) {
1185                 for (i = 0; i < s->max_b_frames + 1; i++) {
1186                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1187
1188                     if (pict_num >= s->rc_context.num_entries)
1189                         break;
1190                     if (!s->input_picture[i]) {
1191                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1192                         break;
1193                     }
1194
1195                     s->input_picture[i]->f.pict_type =
1196                         s->rc_context.entry[pict_num].new_pict_type;
1197                 }
1198             }
1199
1200             if (s->avctx->b_frame_strategy == 0) {
1201                 b_frames = s->max_b_frames;
1202                 while (b_frames && !s->input_picture[b_frames])
1203                     b_frames--;
1204             } else if (s->avctx->b_frame_strategy == 1) {
1205                 for (i = 1; i < s->max_b_frames + 1; i++) {
1206                     if (s->input_picture[i] &&
1207                         s->input_picture[i]->b_frame_score == 0) {
1208                         s->input_picture[i]->b_frame_score =
1209                             get_intra_count(s,
1210                                             s->input_picture[i    ]->f.data[0],
1211                                             s->input_picture[i - 1]->f.data[0],
1212                                             s->linesize) + 1;
1213                     }
1214                 }
1215                 for (i = 0; i < s->max_b_frames + 1; i++) {
1216                     if (s->input_picture[i] == NULL ||
1217                         s->input_picture[i]->b_frame_score - 1 >
1218                             s->mb_num / s->avctx->b_sensitivity)
1219                         break;
1220                 }
1221
1222                 b_frames = FFMAX(0, i - 1);
1223
1224                 /* reset scores */
1225                 for (i = 0; i < b_frames + 1; i++) {
1226                     s->input_picture[i]->b_frame_score = 0;
1227                 }
1228             } else if (s->avctx->b_frame_strategy == 2) {
1229                 b_frames = estimate_best_b_count(s);
1230             } else {
1231                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1232                 b_frames = 0;
1233             }
1234
1235             emms_c();
1236
1237             for (i = b_frames - 1; i >= 0; i--) {
1238                 int type = s->input_picture[i]->f.pict_type;
1239                 if (type && type != AV_PICTURE_TYPE_B)
1240                     b_frames = i;
1241             }
1242             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1243                 b_frames == s->max_b_frames) {
1244                 av_log(s->avctx, AV_LOG_ERROR,
1245                        "warning, too many b frames in a row\n");
1246             }
1247
1248             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1249                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1250                     s->gop_size > s->picture_in_gop_number) {
1251                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1252                 } else {
1253                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1254                         b_frames = 0;
1255                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1256                 }
1257             }
1258
1259             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1260                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1261                 b_frames--;
1262
1263             s->reordered_input_picture[0] = s->input_picture[b_frames];
1264             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1265                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1266             s->reordered_input_picture[0]->f.coded_picture_number =
1267                 s->coded_picture_number++;
1268             for (i = 0; i < b_frames; i++) {
1269                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1270                 s->reordered_input_picture[i + 1]->f.pict_type =
1271                     AV_PICTURE_TYPE_B;
1272                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1273                     s->coded_picture_number++;
1274             }
1275         }
1276     }
1277 no_output_pic:
1278     if (s->reordered_input_picture[0]) {
1279         s->reordered_input_picture[0]->reference =
1280            s->reordered_input_picture[0]->f.pict_type !=
1281                AV_PICTURE_TYPE_B ? 3 : 0;
1282
1283         ff_mpeg_unref_picture(s, &s->new_picture);
1284         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1285             return ret;
1286
1287         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1288             // input is a shared pix, so we can't modifiy it -> alloc a new
1289             // one & ensure that the shared one is reuseable
1290
1291             Picture *pic;
1292             int i = ff_find_unused_picture(s, 0);
1293             if (i < 0)
1294                 return i;
1295             pic = &s->picture[i];
1296
1297             pic->reference = s->reordered_input_picture[0]->reference;
1298             if (ff_alloc_picture(s, pic, 0) < 0) {
1299                 return -1;
1300             }
1301
1302             copy_picture_attributes(s, &pic->f,
1303                                     &s->reordered_input_picture[0]->f);
1304
1305             /* mark us unused / free shared pic */
1306             av_frame_unref(&s->reordered_input_picture[0]->f);
1307             s->reordered_input_picture[0]->shared = 0;
1308
1309             s->current_picture_ptr = pic;
1310         } else {
1311             // input is not a shared pix -> reuse buffer for current_pix
1312             s->current_picture_ptr = s->reordered_input_picture[0];
1313             for (i = 0; i < 4; i++) {
1314                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1315             }
1316         }
1317         ff_mpeg_unref_picture(s, &s->current_picture);
1318         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1319                                        s->current_picture_ptr)) < 0)
1320             return ret;
1321
1322         s->picture_number = s->new_picture.f.display_picture_number;
1323     } else {
1324         ff_mpeg_unref_picture(s, &s->new_picture);
1325     }
1326     return 0;
1327 }
1328
1329 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1330                           const AVFrame *pic_arg, int *got_packet)
1331 {
1332     MpegEncContext *s = avctx->priv_data;
1333     int i, stuffing_count, ret;
1334     int context_count = s->slice_context_count;
1335
1336     s->picture_in_gop_number++;
1337
1338     if (load_input_picture(s, pic_arg) < 0)
1339         return -1;
1340
1341     if (select_input_picture(s) < 0) {
1342         return -1;
1343     }
1344
1345     /* output? */
1346     if (s->new_picture.f.data[0]) {
1347         if (!pkt->data &&
1348             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1349             return ret;
1350         if (s->mb_info) {
1351             s->mb_info_ptr = av_packet_new_side_data(pkt,
1352                                  AV_PKT_DATA_H263_MB_INFO,
1353                                  s->mb_width*s->mb_height*12);
1354             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1355         }
1356
1357         for (i = 0; i < context_count; i++) {
1358             int start_y = s->thread_context[i]->start_mb_y;
1359             int   end_y = s->thread_context[i]->  end_mb_y;
1360             int h       = s->mb_height;
1361             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1362             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1363
1364             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1365         }
1366
1367         s->pict_type = s->new_picture.f.pict_type;
1368         //emms_c();
1369         ff_MPV_frame_start(s, avctx);
1370 vbv_retry:
1371         if (encode_picture(s, s->picture_number) < 0)
1372             return -1;
1373
1374         avctx->header_bits = s->header_bits;
1375         avctx->mv_bits     = s->mv_bits;
1376         avctx->misc_bits   = s->misc_bits;
1377         avctx->i_tex_bits  = s->i_tex_bits;
1378         avctx->p_tex_bits  = s->p_tex_bits;
1379         avctx->i_count     = s->i_count;
1380         // FIXME f/b_count in avctx
1381         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1382         avctx->skip_count  = s->skip_count;
1383
1384         ff_MPV_frame_end(s);
1385
1386         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1387             ff_mjpeg_encode_picture_trailer(s);
1388
1389         if (avctx->rc_buffer_size) {
1390             RateControlContext *rcc = &s->rc_context;
1391             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1392
1393             if (put_bits_count(&s->pb) > max_size &&
1394                 s->lambda < s->avctx->lmax) {
1395                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1396                                        (s->qscale + 1) / s->qscale);
1397                 if (s->adaptive_quant) {
1398                     int i;
1399                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1400                         s->lambda_table[i] =
1401                             FFMAX(s->lambda_table[i] + 1,
1402                                   s->lambda_table[i] * (s->qscale + 1) /
1403                                   s->qscale);
1404                 }
1405                 s->mb_skipped = 0;        // done in MPV_frame_start()
1406                 // done in encode_picture() so we must undo it
1407                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1408                     if (s->flipflop_rounding          ||
1409                         s->codec_id == AV_CODEC_ID_H263P ||
1410                         s->codec_id == AV_CODEC_ID_MPEG4)
1411                         s->no_rounding ^= 1;
1412                 }
1413                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1414                     s->time_base       = s->last_time_base;
1415                     s->last_non_b_time = s->time - s->pp_time;
1416                 }
1417                 for (i = 0; i < context_count; i++) {
1418                     PutBitContext *pb = &s->thread_context[i]->pb;
1419                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1420                 }
1421                 goto vbv_retry;
1422             }
1423
1424             assert(s->avctx->rc_max_rate);
1425         }
1426
1427         if (s->flags & CODEC_FLAG_PASS1)
1428             ff_write_pass1_stats(s);
1429
1430         for (i = 0; i < 4; i++) {
1431             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1432             avctx->error[i] += s->current_picture_ptr->f.error[i];
1433         }
1434
1435         if (s->flags & CODEC_FLAG_PASS1)
1436             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1437                    avctx->i_tex_bits + avctx->p_tex_bits ==
1438                        put_bits_count(&s->pb));
1439         flush_put_bits(&s->pb);
1440         s->frame_bits  = put_bits_count(&s->pb);
1441
1442         stuffing_count = ff_vbv_update(s, s->frame_bits);
1443         if (stuffing_count) {
1444             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1445                     stuffing_count + 50) {
1446                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1447                 return -1;
1448             }
1449
1450             switch (s->codec_id) {
1451             case AV_CODEC_ID_MPEG1VIDEO:
1452             case AV_CODEC_ID_MPEG2VIDEO:
1453                 while (stuffing_count--) {
1454                     put_bits(&s->pb, 8, 0);
1455                 }
1456             break;
1457             case AV_CODEC_ID_MPEG4:
1458                 put_bits(&s->pb, 16, 0);
1459                 put_bits(&s->pb, 16, 0x1C3);
1460                 stuffing_count -= 4;
1461                 while (stuffing_count--) {
1462                     put_bits(&s->pb, 8, 0xFF);
1463                 }
1464             break;
1465             default:
1466                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1467             }
1468             flush_put_bits(&s->pb);
1469             s->frame_bits  = put_bits_count(&s->pb);
1470         }
1471
1472         /* update mpeg1/2 vbv_delay for CBR */
1473         if (s->avctx->rc_max_rate                          &&
1474             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1475             s->out_format == FMT_MPEG1                     &&
1476             90000LL * (avctx->rc_buffer_size - 1) <=
1477                 s->avctx->rc_max_rate * 0xFFFFLL) {
1478             int vbv_delay, min_delay;
1479             double inbits  = s->avctx->rc_max_rate *
1480                              av_q2d(s->avctx->time_base);
1481             int    minbits = s->frame_bits - 8 *
1482                              (s->vbv_delay_ptr - s->pb.buf - 1);
1483             double bits    = s->rc_context.buffer_index + minbits - inbits;
1484
1485             if (bits < 0)
1486                 av_log(s->avctx, AV_LOG_ERROR,
1487                        "Internal error, negative bits\n");
1488
1489             assert(s->repeat_first_field == 0);
1490
1491             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1492             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1493                         s->avctx->rc_max_rate;
1494
1495             vbv_delay = FFMAX(vbv_delay, min_delay);
1496
1497             assert(vbv_delay < 0xFFFF);
1498
1499             s->vbv_delay_ptr[0] &= 0xF8;
1500             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1501             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1502             s->vbv_delay_ptr[2] &= 0x07;
1503             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1504             avctx->vbv_delay     = vbv_delay * 300;
1505         }
1506         s->total_bits     += s->frame_bits;
1507         avctx->frame_bits  = s->frame_bits;
1508
1509         pkt->pts = s->current_picture.f.pts;
1510         if (!s->low_delay) {
1511             if (!s->current_picture.f.coded_picture_number)
1512                 pkt->dts = pkt->pts - s->dts_delta;
1513             else
1514                 pkt->dts = s->reordered_pts;
1515             s->reordered_pts = s->input_picture[0]->f.pts;
1516         } else
1517             pkt->dts = pkt->pts;
1518         if (s->current_picture.f.key_frame)
1519             pkt->flags |= AV_PKT_FLAG_KEY;
1520         if (s->mb_info)
1521             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1522     } else {
1523         s->frame_bits = 0;
1524     }
1525     assert((s->frame_bits & 7) == 0);
1526
1527     pkt->size = s->frame_bits / 8;
1528     *got_packet = !!pkt->size;
1529     return 0;
1530 }
1531
1532 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1533                                                 int n, int threshold)
1534 {
1535     static const char tab[64] = {
1536         3, 2, 2, 1, 1, 1, 1, 1,
1537         1, 1, 1, 1, 1, 1, 1, 1,
1538         1, 1, 1, 1, 1, 1, 1, 1,
1539         0, 0, 0, 0, 0, 0, 0, 0,
1540         0, 0, 0, 0, 0, 0, 0, 0,
1541         0, 0, 0, 0, 0, 0, 0, 0,
1542         0, 0, 0, 0, 0, 0, 0, 0,
1543         0, 0, 0, 0, 0, 0, 0, 0
1544     };
1545     int score = 0;
1546     int run = 0;
1547     int i;
1548     int16_t *block = s->block[n];
1549     const int last_index = s->block_last_index[n];
1550     int skip_dc;
1551
1552     if (threshold < 0) {
1553         skip_dc = 0;
1554         threshold = -threshold;
1555     } else
1556         skip_dc = 1;
1557
1558     /* Are all we could set to zero already zero? */
1559     if (last_index <= skip_dc - 1)
1560         return;
1561
1562     for (i = 0; i <= last_index; i++) {
1563         const int j = s->intra_scantable.permutated[i];
1564         const int level = FFABS(block[j]);
1565         if (level == 1) {
1566             if (skip_dc && i == 0)
1567                 continue;
1568             score += tab[run];
1569             run = 0;
1570         } else if (level > 1) {
1571             return;
1572         } else {
1573             run++;
1574         }
1575     }
1576     if (score >= threshold)
1577         return;
1578     for (i = skip_dc; i <= last_index; i++) {
1579         const int j = s->intra_scantable.permutated[i];
1580         block[j] = 0;
1581     }
1582     if (block[0])
1583         s->block_last_index[n] = 0;
1584     else
1585         s->block_last_index[n] = -1;
1586 }
1587
1588 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1589                                int last_index)
1590 {
1591     int i;
1592     const int maxlevel = s->max_qcoeff;
1593     const int minlevel = s->min_qcoeff;
1594     int overflow = 0;
1595
1596     if (s->mb_intra) {
1597         i = 1; // skip clipping of intra dc
1598     } else
1599         i = 0;
1600
1601     for (; i <= last_index; i++) {
1602         const int j = s->intra_scantable.permutated[i];
1603         int level = block[j];
1604
1605         if (level > maxlevel) {
1606             level = maxlevel;
1607             overflow++;
1608         } else if (level < minlevel) {
1609             level = minlevel;
1610             overflow++;
1611         }
1612
1613         block[j] = level;
1614     }
1615
1616     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1617         av_log(s->avctx, AV_LOG_INFO,
1618                "warning, clipping %d dct coefficients to %d..%d\n",
1619                overflow, minlevel, maxlevel);
1620 }
1621
1622 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1623 {
1624     int x, y;
1625     // FIXME optimize
1626     for (y = 0; y < 8; y++) {
1627         for (x = 0; x < 8; x++) {
1628             int x2, y2;
1629             int sum = 0;
1630             int sqr = 0;
1631             int count = 0;
1632
1633             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1634                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1635                     int v = ptr[x2 + y2 * stride];
1636                     sum += v;
1637                     sqr += v * v;
1638                     count++;
1639                 }
1640             }
1641             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1642         }
1643     }
1644 }
1645
1646 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1647                                                 int motion_x, int motion_y,
1648                                                 int mb_block_height,
1649                                                 int mb_block_count)
1650 {
1651     int16_t weight[8][64];
1652     int16_t orig[8][64];
1653     const int mb_x = s->mb_x;
1654     const int mb_y = s->mb_y;
1655     int i;
1656     int skip_dct[8];
1657     int dct_offset = s->linesize * 8; // default for progressive frames
1658     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1659     int wrap_y, wrap_c;
1660
1661     for (i = 0; i < mb_block_count; i++)
1662         skip_dct[i] = s->skipdct;
1663
1664     if (s->adaptive_quant) {
1665         const int last_qp = s->qscale;
1666         const int mb_xy = mb_x + mb_y * s->mb_stride;
1667
1668         s->lambda = s->lambda_table[mb_xy];
1669         update_qscale(s);
1670
1671         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1672             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1673             s->dquant = s->qscale - last_qp;
1674
1675             if (s->out_format == FMT_H263) {
1676                 s->dquant = av_clip(s->dquant, -2, 2);
1677
1678                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1679                     if (!s->mb_intra) {
1680                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1681                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1682                                 s->dquant = 0;
1683                         }
1684                         if (s->mv_type == MV_TYPE_8X8)
1685                             s->dquant = 0;
1686                     }
1687                 }
1688             }
1689         }
1690         ff_set_qscale(s, last_qp + s->dquant);
1691     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1692         ff_set_qscale(s, s->qscale + s->dquant);
1693
1694     wrap_y = s->linesize;
1695     wrap_c = s->uvlinesize;
1696     ptr_y  = s->new_picture.f.data[0] +
1697              (mb_y * 16 * wrap_y)              + mb_x * 16;
1698     ptr_cb = s->new_picture.f.data[1] +
1699              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1700     ptr_cr = s->new_picture.f.data[2] +
1701              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1702
1703     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1704         uint8_t *ebuf = s->edge_emu_buffer + 32;
1705         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1706                                  mb_y * 16, s->width, s->height);
1707         ptr_y = ebuf;
1708         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1709                                  mb_block_height, mb_x * 8, mb_y * 8,
1710                                  s->width >> 1, s->height >> 1);
1711         ptr_cb = ebuf + 18 * wrap_y;
1712         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1713                                  mb_block_height, mb_x * 8, mb_y * 8,
1714                                  s->width >> 1, s->height >> 1);
1715         ptr_cr = ebuf + 18 * wrap_y + 8;
1716     }
1717
1718     if (s->mb_intra) {
1719         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1720             int progressive_score, interlaced_score;
1721
1722             s->interlaced_dct = 0;
1723             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1724                                                     NULL, wrap_y, 8) +
1725                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1726                                                     NULL, wrap_y, 8) - 400;
1727
1728             if (progressive_score > 0) {
1729                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1730                                                        NULL, wrap_y * 2, 8) +
1731                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1732                                                        NULL, wrap_y * 2, 8);
1733                 if (progressive_score > interlaced_score) {
1734                     s->interlaced_dct = 1;
1735
1736                     dct_offset = wrap_y;
1737                     wrap_y <<= 1;
1738                     if (s->chroma_format == CHROMA_422)
1739                         wrap_c <<= 1;
1740                 }
1741             }
1742         }
1743
1744         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1745         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1746         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1747         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1748
1749         if (s->flags & CODEC_FLAG_GRAY) {
1750             skip_dct[4] = 1;
1751             skip_dct[5] = 1;
1752         } else {
1753             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1754             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1755             if (!s->chroma_y_shift) { /* 422 */
1756                 s->dsp.get_pixels(s->block[6],
1757                                   ptr_cb + (dct_offset >> 1), wrap_c);
1758                 s->dsp.get_pixels(s->block[7],
1759                                   ptr_cr + (dct_offset >> 1), wrap_c);
1760             }
1761         }
1762     } else {
1763         op_pixels_func (*op_pix)[4];
1764         qpel_mc_func (*op_qpix)[16];
1765         uint8_t *dest_y, *dest_cb, *dest_cr;
1766
1767         dest_y  = s->dest[0];
1768         dest_cb = s->dest[1];
1769         dest_cr = s->dest[2];
1770
1771         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1772             op_pix  = s->dsp.put_pixels_tab;
1773             op_qpix = s->dsp.put_qpel_pixels_tab;
1774         } else {
1775             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1776             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1777         }
1778
1779         if (s->mv_dir & MV_DIR_FORWARD) {
1780             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1781                           s->last_picture.f.data,
1782                           op_pix, op_qpix);
1783             op_pix  = s->dsp.avg_pixels_tab;
1784             op_qpix = s->dsp.avg_qpel_pixels_tab;
1785         }
1786         if (s->mv_dir & MV_DIR_BACKWARD) {
1787             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1788                           s->next_picture.f.data,
1789                           op_pix, op_qpix);
1790         }
1791
1792         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1793             int progressive_score, interlaced_score;
1794
1795             s->interlaced_dct = 0;
1796             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1797                                                     ptr_y,              wrap_y,
1798                                                     8) +
1799                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1800                                                     ptr_y + wrap_y * 8, wrap_y,
1801                                                     8) - 400;
1802
1803             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1804                 progressive_score -= 400;
1805
1806             if (progressive_score > 0) {
1807                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1808                                                        ptr_y,
1809                                                        wrap_y * 2, 8) +
1810                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1811                                                        ptr_y + wrap_y,
1812                                                        wrap_y * 2, 8);
1813
1814                 if (progressive_score > interlaced_score) {
1815                     s->interlaced_dct = 1;
1816
1817                     dct_offset = wrap_y;
1818                     wrap_y <<= 1;
1819                     if (s->chroma_format == CHROMA_422)
1820                         wrap_c <<= 1;
1821                 }
1822             }
1823         }
1824
1825         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1826         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1827         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1828                            dest_y + dct_offset, wrap_y);
1829         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1830                            dest_y + dct_offset + 8, wrap_y);
1831
1832         if (s->flags & CODEC_FLAG_GRAY) {
1833             skip_dct[4] = 1;
1834             skip_dct[5] = 1;
1835         } else {
1836             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1837             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1838             if (!s->chroma_y_shift) { /* 422 */
1839                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1840                                    dest_cb + (dct_offset >> 1), wrap_c);
1841                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1842                                    dest_cr + (dct_offset >> 1), wrap_c);
1843             }
1844         }
1845         /* pre quantization */
1846         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1847                 2 * s->qscale * s->qscale) {
1848             // FIXME optimize
1849             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1850                               wrap_y, 8) < 20 * s->qscale)
1851                 skip_dct[0] = 1;
1852             if (s->dsp.sad[1](NULL, ptr_y + 8,
1853                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1854                 skip_dct[1] = 1;
1855             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1856                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1857                 skip_dct[2] = 1;
1858             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1859                               dest_y + dct_offset + 8,
1860                               wrap_y, 8) < 20 * s->qscale)
1861                 skip_dct[3] = 1;
1862             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1863                               wrap_c, 8) < 20 * s->qscale)
1864                 skip_dct[4] = 1;
1865             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1866                               wrap_c, 8) < 20 * s->qscale)
1867                 skip_dct[5] = 1;
1868             if (!s->chroma_y_shift) { /* 422 */
1869                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1870                                   dest_cb + (dct_offset >> 1),
1871                                   wrap_c, 8) < 20 * s->qscale)
1872                     skip_dct[6] = 1;
1873                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1874                                   dest_cr + (dct_offset >> 1),
1875                                   wrap_c, 8) < 20 * s->qscale)
1876                     skip_dct[7] = 1;
1877             }
1878         }
1879     }
1880
1881     if (s->quantizer_noise_shaping) {
1882         if (!skip_dct[0])
1883             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1884         if (!skip_dct[1])
1885             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1886         if (!skip_dct[2])
1887             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1888         if (!skip_dct[3])
1889             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1890         if (!skip_dct[4])
1891             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1892         if (!skip_dct[5])
1893             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1894         if (!s->chroma_y_shift) { /* 422 */
1895             if (!skip_dct[6])
1896                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1897                                   wrap_c);
1898             if (!skip_dct[7])
1899                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1900                                   wrap_c);
1901         }
1902         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1903     }
1904
1905     /* DCT & quantize */
1906     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1907     {
1908         for (i = 0; i < mb_block_count; i++) {
1909             if (!skip_dct[i]) {
1910                 int overflow;
1911                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1912                 // FIXME we could decide to change to quantizer instead of
1913                 // clipping
1914                 // JS: I don't think that would be a good idea it could lower
1915                 //     quality instead of improve it. Just INTRADC clipping
1916                 //     deserves changes in quantizer
1917                 if (overflow)
1918                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1919             } else
1920                 s->block_last_index[i] = -1;
1921         }
1922         if (s->quantizer_noise_shaping) {
1923             for (i = 0; i < mb_block_count; i++) {
1924                 if (!skip_dct[i]) {
1925                     s->block_last_index[i] =
1926                         dct_quantize_refine(s, s->block[i], weight[i],
1927                                             orig[i], i, s->qscale);
1928                 }
1929             }
1930         }
1931
1932         if (s->luma_elim_threshold && !s->mb_intra)
1933             for (i = 0; i < 4; i++)
1934                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1935         if (s->chroma_elim_threshold && !s->mb_intra)
1936             for (i = 4; i < mb_block_count; i++)
1937                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1938
1939         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
1940             for (i = 0; i < mb_block_count; i++) {
1941                 if (s->block_last_index[i] == -1)
1942                     s->coded_score[i] = INT_MAX / 256;
1943             }
1944         }
1945     }
1946
1947     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1948         s->block_last_index[4] =
1949         s->block_last_index[5] = 0;
1950         s->block[4][0] =
1951         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1952     }
1953
1954     // non c quantize code returns incorrect block_last_index FIXME
1955     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
1956         for (i = 0; i < mb_block_count; i++) {
1957             int j;
1958             if (s->block_last_index[i] > 0) {
1959                 for (j = 63; j > 0; j--) {
1960                     if (s->block[i][s->intra_scantable.permutated[j]])
1961                         break;
1962                 }
1963                 s->block_last_index[i] = j;
1964             }
1965         }
1966     }
1967
1968     /* huffman encode */
1969     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
1970     case AV_CODEC_ID_MPEG1VIDEO:
1971     case AV_CODEC_ID_MPEG2VIDEO:
1972         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
1973             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
1974         break;
1975     case AV_CODEC_ID_MPEG4:
1976         if (CONFIG_MPEG4_ENCODER)
1977             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
1978         break;
1979     case AV_CODEC_ID_MSMPEG4V2:
1980     case AV_CODEC_ID_MSMPEG4V3:
1981     case AV_CODEC_ID_WMV1:
1982         if (CONFIG_MSMPEG4_ENCODER)
1983             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
1984         break;
1985     case AV_CODEC_ID_WMV2:
1986         if (CONFIG_WMV2_ENCODER)
1987             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
1988         break;
1989     case AV_CODEC_ID_H261:
1990         if (CONFIG_H261_ENCODER)
1991             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
1992         break;
1993     case AV_CODEC_ID_H263:
1994     case AV_CODEC_ID_H263P:
1995     case AV_CODEC_ID_FLV1:
1996     case AV_CODEC_ID_RV10:
1997     case AV_CODEC_ID_RV20:
1998         if (CONFIG_H263_ENCODER)
1999             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2000         break;
2001     case AV_CODEC_ID_MJPEG:
2002         if (CONFIG_MJPEG_ENCODER)
2003             ff_mjpeg_encode_mb(s, s->block);
2004         break;
2005     default:
2006         assert(0);
2007     }
2008 }
2009
2010 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2011 {
2012     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2013     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2014 }
2015
2016 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2017     int i;
2018
2019     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2020
2021     /* mpeg1 */
2022     d->mb_skip_run= s->mb_skip_run;
2023     for(i=0; i<3; i++)
2024         d->last_dc[i] = s->last_dc[i];
2025
2026     /* statistics */
2027     d->mv_bits= s->mv_bits;
2028     d->i_tex_bits= s->i_tex_bits;
2029     d->p_tex_bits= s->p_tex_bits;
2030     d->i_count= s->i_count;
2031     d->f_count= s->f_count;
2032     d->b_count= s->b_count;
2033     d->skip_count= s->skip_count;
2034     d->misc_bits= s->misc_bits;
2035     d->last_bits= 0;
2036
2037     d->mb_skipped= 0;
2038     d->qscale= s->qscale;
2039     d->dquant= s->dquant;
2040
2041     d->esc3_level_length= s->esc3_level_length;
2042 }
2043
2044 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2045     int i;
2046
2047     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2048     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2049
2050     /* mpeg1 */
2051     d->mb_skip_run= s->mb_skip_run;
2052     for(i=0; i<3; i++)
2053         d->last_dc[i] = s->last_dc[i];
2054
2055     /* statistics */
2056     d->mv_bits= s->mv_bits;
2057     d->i_tex_bits= s->i_tex_bits;
2058     d->p_tex_bits= s->p_tex_bits;
2059     d->i_count= s->i_count;
2060     d->f_count= s->f_count;
2061     d->b_count= s->b_count;
2062     d->skip_count= s->skip_count;
2063     d->misc_bits= s->misc_bits;
2064
2065     d->mb_intra= s->mb_intra;
2066     d->mb_skipped= s->mb_skipped;
2067     d->mv_type= s->mv_type;
2068     d->mv_dir= s->mv_dir;
2069     d->pb= s->pb;
2070     if(s->data_partitioning){
2071         d->pb2= s->pb2;
2072         d->tex_pb= s->tex_pb;
2073     }
2074     d->block= s->block;
2075     for(i=0; i<8; i++)
2076         d->block_last_index[i]= s->block_last_index[i];
2077     d->interlaced_dct= s->interlaced_dct;
2078     d->qscale= s->qscale;
2079
2080     d->esc3_level_length= s->esc3_level_length;
2081 }
2082
2083 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2084                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2085                            int *dmin, int *next_block, int motion_x, int motion_y)
2086 {
2087     int score;
2088     uint8_t *dest_backup[3];
2089
2090     copy_context_before_encode(s, backup, type);
2091
2092     s->block= s->blocks[*next_block];
2093     s->pb= pb[*next_block];
2094     if(s->data_partitioning){
2095         s->pb2   = pb2   [*next_block];
2096         s->tex_pb= tex_pb[*next_block];
2097     }
2098
2099     if(*next_block){
2100         memcpy(dest_backup, s->dest, sizeof(s->dest));
2101         s->dest[0] = s->rd_scratchpad;
2102         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2103         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2104         assert(s->linesize >= 32); //FIXME
2105     }
2106
2107     encode_mb(s, motion_x, motion_y);
2108
2109     score= put_bits_count(&s->pb);
2110     if(s->data_partitioning){
2111         score+= put_bits_count(&s->pb2);
2112         score+= put_bits_count(&s->tex_pb);
2113     }
2114
2115     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2116         ff_MPV_decode_mb(s, s->block);
2117
2118         score *= s->lambda2;
2119         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2120     }
2121
2122     if(*next_block){
2123         memcpy(s->dest, dest_backup, sizeof(s->dest));
2124     }
2125
2126     if(score<*dmin){
2127         *dmin= score;
2128         *next_block^=1;
2129
2130         copy_context_after_encode(best, s, type);
2131     }
2132 }
2133
2134 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2135     uint32_t *sq = ff_squareTbl + 256;
2136     int acc=0;
2137     int x,y;
2138
2139     if(w==16 && h==16)
2140         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2141     else if(w==8 && h==8)
2142         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2143
2144     for(y=0; y<h; y++){
2145         for(x=0; x<w; x++){
2146             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2147         }
2148     }
2149
2150     assert(acc>=0);
2151
2152     return acc;
2153 }
2154
2155 static int sse_mb(MpegEncContext *s){
2156     int w= 16;
2157     int h= 16;
2158
2159     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2160     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2161
2162     if(w==16 && h==16)
2163       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2164         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2165                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2166                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2167       }else{
2168         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2169                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2170                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2171       }
2172     else
2173         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2174                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2175                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2176 }
2177
2178 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2179     MpegEncContext *s= *(void**)arg;
2180
2181
2182     s->me.pre_pass=1;
2183     s->me.dia_size= s->avctx->pre_dia_size;
2184     s->first_slice_line=1;
2185     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2186         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2187             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2188         }
2189         s->first_slice_line=0;
2190     }
2191
2192     s->me.pre_pass=0;
2193
2194     return 0;
2195 }
2196
2197 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2198     MpegEncContext *s= *(void**)arg;
2199
2200     ff_check_alignment();
2201
2202     s->me.dia_size= s->avctx->dia_size;
2203     s->first_slice_line=1;
2204     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2205         s->mb_x=0; //for block init below
2206         ff_init_block_index(s);
2207         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2208             s->block_index[0]+=2;
2209             s->block_index[1]+=2;
2210             s->block_index[2]+=2;
2211             s->block_index[3]+=2;
2212
2213             /* compute motion vector & mb_type and store in context */
2214             if(s->pict_type==AV_PICTURE_TYPE_B)
2215                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2216             else
2217                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2218         }
2219         s->first_slice_line=0;
2220     }
2221     return 0;
2222 }
2223
2224 static int mb_var_thread(AVCodecContext *c, void *arg){
2225     MpegEncContext *s= *(void**)arg;
2226     int mb_x, mb_y;
2227
2228     ff_check_alignment();
2229
2230     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2231         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2232             int xx = mb_x * 16;
2233             int yy = mb_y * 16;
2234             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2235             int varc;
2236             int sum = s->dsp.pix_sum(pix, s->linesize);
2237
2238             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2239
2240             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2241             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2242             s->me.mb_var_sum_temp    += varc;
2243         }
2244     }
2245     return 0;
2246 }
2247
2248 static void write_slice_end(MpegEncContext *s){
2249     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2250         if(s->partitioned_frame){
2251             ff_mpeg4_merge_partitions(s);
2252         }
2253
2254         ff_mpeg4_stuffing(&s->pb);
2255     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2256         ff_mjpeg_encode_stuffing(&s->pb);
2257     }
2258
2259     avpriv_align_put_bits(&s->pb);
2260     flush_put_bits(&s->pb);
2261
2262     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2263         s->misc_bits+= get_bits_diff(s);
2264 }
2265
2266 static void write_mb_info(MpegEncContext *s)
2267 {
2268     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2269     int offset = put_bits_count(&s->pb);
2270     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2271     int gobn = s->mb_y / s->gob_index;
2272     int pred_x, pred_y;
2273     if (CONFIG_H263_ENCODER)
2274         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2275     bytestream_put_le32(&ptr, offset);
2276     bytestream_put_byte(&ptr, s->qscale);
2277     bytestream_put_byte(&ptr, gobn);
2278     bytestream_put_le16(&ptr, mba);
2279     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2280     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2281     /* 4MV not implemented */
2282     bytestream_put_byte(&ptr, 0); /* hmv2 */
2283     bytestream_put_byte(&ptr, 0); /* vmv2 */
2284 }
2285
2286 static void update_mb_info(MpegEncContext *s, int startcode)
2287 {
2288     if (!s->mb_info)
2289         return;
2290     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2291         s->mb_info_size += 12;
2292         s->prev_mb_info = s->last_mb_info;
2293     }
2294     if (startcode) {
2295         s->prev_mb_info = put_bits_count(&s->pb)/8;
2296         /* This might have incremented mb_info_size above, and we return without
2297          * actually writing any info into that slot yet. But in that case,
2298          * this will be called again at the start of the after writing the
2299          * start code, actually writing the mb info. */
2300         return;
2301     }
2302
2303     s->last_mb_info = put_bits_count(&s->pb)/8;
2304     if (!s->mb_info_size)
2305         s->mb_info_size += 12;
2306     write_mb_info(s);
2307 }
2308
2309 static int encode_thread(AVCodecContext *c, void *arg){
2310     MpegEncContext *s= *(void**)arg;
2311     int mb_x, mb_y, pdif = 0;
2312     int chr_h= 16>>s->chroma_y_shift;
2313     int i, j;
2314     MpegEncContext best_s, backup_s;
2315     uint8_t bit_buf[2][MAX_MB_BYTES];
2316     uint8_t bit_buf2[2][MAX_MB_BYTES];
2317     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2318     PutBitContext pb[2], pb2[2], tex_pb[2];
2319
2320     ff_check_alignment();
2321
2322     for(i=0; i<2; i++){
2323         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2324         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2325         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2326     }
2327
2328     s->last_bits= put_bits_count(&s->pb);
2329     s->mv_bits=0;
2330     s->misc_bits=0;
2331     s->i_tex_bits=0;
2332     s->p_tex_bits=0;
2333     s->i_count=0;
2334     s->f_count=0;
2335     s->b_count=0;
2336     s->skip_count=0;
2337
2338     for(i=0; i<3; i++){
2339         /* init last dc values */
2340         /* note: quant matrix value (8) is implied here */
2341         s->last_dc[i] = 128 << s->intra_dc_precision;
2342
2343         s->current_picture.f.error[i] = 0;
2344     }
2345     s->mb_skip_run = 0;
2346     memset(s->last_mv, 0, sizeof(s->last_mv));
2347
2348     s->last_mv_dir = 0;
2349
2350     switch(s->codec_id){
2351     case AV_CODEC_ID_H263:
2352     case AV_CODEC_ID_H263P:
2353     case AV_CODEC_ID_FLV1:
2354         if (CONFIG_H263_ENCODER)
2355             s->gob_index = ff_h263_get_gob_height(s);
2356         break;
2357     case AV_CODEC_ID_MPEG4:
2358         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2359             ff_mpeg4_init_partitions(s);
2360         break;
2361     }
2362
2363     s->resync_mb_x=0;
2364     s->resync_mb_y=0;
2365     s->first_slice_line = 1;
2366     s->ptr_lastgob = s->pb.buf;
2367     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2368         s->mb_x=0;
2369         s->mb_y= mb_y;
2370
2371         ff_set_qscale(s, s->qscale);
2372         ff_init_block_index(s);
2373
2374         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2375             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2376             int mb_type= s->mb_type[xy];
2377 //            int d;
2378             int dmin= INT_MAX;
2379             int dir;
2380
2381             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2382                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2383                 return -1;
2384             }
2385             if(s->data_partitioning){
2386                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2387                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2388                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2389                     return -1;
2390                 }
2391             }
2392
2393             s->mb_x = mb_x;
2394             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2395             ff_update_block_index(s);
2396
2397             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2398                 ff_h261_reorder_mb_index(s);
2399                 xy= s->mb_y*s->mb_stride + s->mb_x;
2400                 mb_type= s->mb_type[xy];
2401             }
2402
2403             /* write gob / video packet header  */
2404             if(s->rtp_mode){
2405                 int current_packet_size, is_gob_start;
2406
2407                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2408
2409                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2410
2411                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2412
2413                 switch(s->codec_id){
2414                 case AV_CODEC_ID_H263:
2415                 case AV_CODEC_ID_H263P:
2416                     if(!s->h263_slice_structured)
2417                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2418                     break;
2419                 case AV_CODEC_ID_MPEG2VIDEO:
2420                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2421                 case AV_CODEC_ID_MPEG1VIDEO:
2422                     if(s->mb_skip_run) is_gob_start=0;
2423                     break;
2424                 }
2425
2426                 if(is_gob_start){
2427                     if(s->start_mb_y != mb_y || mb_x!=0){
2428                         write_slice_end(s);
2429
2430                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2431                             ff_mpeg4_init_partitions(s);
2432                         }
2433                     }
2434
2435                     assert((put_bits_count(&s->pb)&7) == 0);
2436                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2437
2438                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2439                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2440                         int d= 100 / s->avctx->error_rate;
2441                         if(r % d == 0){
2442                             current_packet_size=0;
2443                             s->pb.buf_ptr= s->ptr_lastgob;
2444                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2445                         }
2446                     }
2447
2448                     if (s->avctx->rtp_callback){
2449                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2450                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2451                     }
2452                     update_mb_info(s, 1);
2453
2454                     switch(s->codec_id){
2455                     case AV_CODEC_ID_MPEG4:
2456                         if (CONFIG_MPEG4_ENCODER) {
2457                             ff_mpeg4_encode_video_packet_header(s);
2458                             ff_mpeg4_clean_buffers(s);
2459                         }
2460                     break;
2461                     case AV_CODEC_ID_MPEG1VIDEO:
2462                     case AV_CODEC_ID_MPEG2VIDEO:
2463                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2464                             ff_mpeg1_encode_slice_header(s);
2465                             ff_mpeg1_clean_buffers(s);
2466                         }
2467                     break;
2468                     case AV_CODEC_ID_H263:
2469                     case AV_CODEC_ID_H263P:
2470                         if (CONFIG_H263_ENCODER)
2471                             ff_h263_encode_gob_header(s, mb_y);
2472                     break;
2473                     }
2474
2475                     if(s->flags&CODEC_FLAG_PASS1){
2476                         int bits= put_bits_count(&s->pb);
2477                         s->misc_bits+= bits - s->last_bits;
2478                         s->last_bits= bits;
2479                     }
2480
2481                     s->ptr_lastgob += current_packet_size;
2482                     s->first_slice_line=1;
2483                     s->resync_mb_x=mb_x;
2484                     s->resync_mb_y=mb_y;
2485                 }
2486             }
2487
2488             if(  (s->resync_mb_x   == s->mb_x)
2489                && s->resync_mb_y+1 == s->mb_y){
2490                 s->first_slice_line=0;
2491             }
2492
2493             s->mb_skipped=0;
2494             s->dquant=0; //only for QP_RD
2495
2496             update_mb_info(s, 0);
2497
2498             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2499                 int next_block=0;
2500                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2501
2502                 copy_context_before_encode(&backup_s, s, -1);
2503                 backup_s.pb= s->pb;
2504                 best_s.data_partitioning= s->data_partitioning;
2505                 best_s.partitioned_frame= s->partitioned_frame;
2506                 if(s->data_partitioning){
2507                     backup_s.pb2= s->pb2;
2508                     backup_s.tex_pb= s->tex_pb;
2509                 }
2510
2511                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2512                     s->mv_dir = MV_DIR_FORWARD;
2513                     s->mv_type = MV_TYPE_16X16;
2514                     s->mb_intra= 0;
2515                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2516                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2517                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2518                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2519                 }
2520                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2521                     s->mv_dir = MV_DIR_FORWARD;
2522                     s->mv_type = MV_TYPE_FIELD;
2523                     s->mb_intra= 0;
2524                     for(i=0; i<2; i++){
2525                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2526                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2527                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2528                     }
2529                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2530                                  &dmin, &next_block, 0, 0);
2531                 }
2532                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2533                     s->mv_dir = MV_DIR_FORWARD;
2534                     s->mv_type = MV_TYPE_16X16;
2535                     s->mb_intra= 0;
2536                     s->mv[0][0][0] = 0;
2537                     s->mv[0][0][1] = 0;
2538                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2539                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2540                 }
2541                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2542                     s->mv_dir = MV_DIR_FORWARD;
2543                     s->mv_type = MV_TYPE_8X8;
2544                     s->mb_intra= 0;
2545                     for(i=0; i<4; i++){
2546                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2547                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2548                     }
2549                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2550                                  &dmin, &next_block, 0, 0);
2551                 }
2552                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2553                     s->mv_dir = MV_DIR_FORWARD;
2554                     s->mv_type = MV_TYPE_16X16;
2555                     s->mb_intra= 0;
2556                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2557                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2558                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2559                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2560                 }
2561                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2562                     s->mv_dir = MV_DIR_BACKWARD;
2563                     s->mv_type = MV_TYPE_16X16;
2564                     s->mb_intra= 0;
2565                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2566                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2567                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2568                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2569                 }
2570                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2571                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2572                     s->mv_type = MV_TYPE_16X16;
2573                     s->mb_intra= 0;
2574                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2575                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2576                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2577                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2578                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2579                                  &dmin, &next_block, 0, 0);
2580                 }
2581                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2582                     s->mv_dir = MV_DIR_FORWARD;
2583                     s->mv_type = MV_TYPE_FIELD;
2584                     s->mb_intra= 0;
2585                     for(i=0; i<2; i++){
2586                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2587                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2588                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2589                     }
2590                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2591                                  &dmin, &next_block, 0, 0);
2592                 }
2593                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2594                     s->mv_dir = MV_DIR_BACKWARD;
2595                     s->mv_type = MV_TYPE_FIELD;
2596                     s->mb_intra= 0;
2597                     for(i=0; i<2; i++){
2598                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2599                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2600                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2601                     }
2602                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2603                                  &dmin, &next_block, 0, 0);
2604                 }
2605                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2606                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2607                     s->mv_type = MV_TYPE_FIELD;
2608                     s->mb_intra= 0;
2609                     for(dir=0; dir<2; dir++){
2610                         for(i=0; i<2; i++){
2611                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2612                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2613                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2614                         }
2615                     }
2616                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2617                                  &dmin, &next_block, 0, 0);
2618                 }
2619                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2620                     s->mv_dir = 0;
2621                     s->mv_type = MV_TYPE_16X16;
2622                     s->mb_intra= 1;
2623                     s->mv[0][0][0] = 0;
2624                     s->mv[0][0][1] = 0;
2625                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2626                                  &dmin, &next_block, 0, 0);
2627                     if(s->h263_pred || s->h263_aic){
2628                         if(best_s.mb_intra)
2629                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2630                         else
2631                             ff_clean_intra_table_entries(s); //old mode?
2632                     }
2633                 }
2634
2635                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2636                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2637                         const int last_qp= backup_s.qscale;
2638                         int qpi, qp, dc[6];
2639                         int16_t ac[6][16];
2640                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2641                         static const int dquant_tab[4]={-1,1,-2,2};
2642
2643                         assert(backup_s.dquant == 0);
2644
2645                         //FIXME intra
2646                         s->mv_dir= best_s.mv_dir;
2647                         s->mv_type = MV_TYPE_16X16;
2648                         s->mb_intra= best_s.mb_intra;
2649                         s->mv[0][0][0] = best_s.mv[0][0][0];
2650                         s->mv[0][0][1] = best_s.mv[0][0][1];
2651                         s->mv[1][0][0] = best_s.mv[1][0][0];
2652                         s->mv[1][0][1] = best_s.mv[1][0][1];
2653
2654                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2655                         for(; qpi<4; qpi++){
2656                             int dquant= dquant_tab[qpi];
2657                             qp= last_qp + dquant;
2658                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2659                                 continue;
2660                             backup_s.dquant= dquant;
2661                             if(s->mb_intra && s->dc_val[0]){
2662                                 for(i=0; i<6; i++){
2663                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2664                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2665                                 }
2666                             }
2667
2668                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2669                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2670                             if(best_s.qscale != qp){
2671                                 if(s->mb_intra && s->dc_val[0]){
2672                                     for(i=0; i<6; i++){
2673                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2674                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2675                                     }
2676                                 }
2677                             }
2678                         }
2679                     }
2680                 }
2681                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2682                     int mx= s->b_direct_mv_table[xy][0];
2683                     int my= s->b_direct_mv_table[xy][1];
2684
2685                     backup_s.dquant = 0;
2686                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2687                     s->mb_intra= 0;
2688                     ff_mpeg4_set_direct_mv(s, mx, my);
2689                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2690                                  &dmin, &next_block, mx, my);
2691                 }
2692                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2693                     backup_s.dquant = 0;
2694                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2695                     s->mb_intra= 0;
2696                     ff_mpeg4_set_direct_mv(s, 0, 0);
2697                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2698                                  &dmin, &next_block, 0, 0);
2699                 }
2700                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2701                     int coded=0;
2702                     for(i=0; i<6; i++)
2703                         coded |= s->block_last_index[i];
2704                     if(coded){
2705                         int mx,my;
2706                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2707                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2708                             mx=my=0; //FIXME find the one we actually used
2709                             ff_mpeg4_set_direct_mv(s, mx, my);
2710                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2711                             mx= s->mv[1][0][0];
2712                             my= s->mv[1][0][1];
2713                         }else{
2714                             mx= s->mv[0][0][0];
2715                             my= s->mv[0][0][1];
2716                         }
2717
2718                         s->mv_dir= best_s.mv_dir;
2719                         s->mv_type = best_s.mv_type;
2720                         s->mb_intra= 0;
2721 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2722                         s->mv[0][0][1] = best_s.mv[0][0][1];
2723                         s->mv[1][0][0] = best_s.mv[1][0][0];
2724                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2725                         backup_s.dquant= 0;
2726                         s->skipdct=1;
2727                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2728                                         &dmin, &next_block, mx, my);
2729                         s->skipdct=0;
2730                     }
2731                 }
2732
2733                 s->current_picture.qscale_table[xy] = best_s.qscale;
2734
2735                 copy_context_after_encode(s, &best_s, -1);
2736
2737                 pb_bits_count= put_bits_count(&s->pb);
2738                 flush_put_bits(&s->pb);
2739                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2740                 s->pb= backup_s.pb;
2741
2742                 if(s->data_partitioning){
2743                     pb2_bits_count= put_bits_count(&s->pb2);
2744                     flush_put_bits(&s->pb2);
2745                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2746                     s->pb2= backup_s.pb2;
2747
2748                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2749                     flush_put_bits(&s->tex_pb);
2750                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2751                     s->tex_pb= backup_s.tex_pb;
2752                 }
2753                 s->last_bits= put_bits_count(&s->pb);
2754
2755                 if (CONFIG_H263_ENCODER &&
2756                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2757                     ff_h263_update_motion_val(s);
2758
2759                 if(next_block==0){ //FIXME 16 vs linesize16
2760                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2761                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2762                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2763                 }
2764
2765                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2766                     ff_MPV_decode_mb(s, s->block);
2767             } else {
2768                 int motion_x = 0, motion_y = 0;
2769                 s->mv_type=MV_TYPE_16X16;
2770                 // only one MB-Type possible
2771
2772                 switch(mb_type){
2773                 case CANDIDATE_MB_TYPE_INTRA:
2774                     s->mv_dir = 0;
2775                     s->mb_intra= 1;
2776                     motion_x= s->mv[0][0][0] = 0;
2777                     motion_y= s->mv[0][0][1] = 0;
2778                     break;
2779                 case CANDIDATE_MB_TYPE_INTER:
2780                     s->mv_dir = MV_DIR_FORWARD;
2781                     s->mb_intra= 0;
2782                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2783                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2784                     break;
2785                 case CANDIDATE_MB_TYPE_INTER_I:
2786                     s->mv_dir = MV_DIR_FORWARD;
2787                     s->mv_type = MV_TYPE_FIELD;
2788                     s->mb_intra= 0;
2789                     for(i=0; i<2; i++){
2790                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2791                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2792                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2793                     }
2794                     break;
2795                 case CANDIDATE_MB_TYPE_INTER4V:
2796                     s->mv_dir = MV_DIR_FORWARD;
2797                     s->mv_type = MV_TYPE_8X8;
2798                     s->mb_intra= 0;
2799                     for(i=0; i<4; i++){
2800                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2801                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2802                     }
2803                     break;
2804                 case CANDIDATE_MB_TYPE_DIRECT:
2805                     if (CONFIG_MPEG4_ENCODER) {
2806                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2807                         s->mb_intra= 0;
2808                         motion_x=s->b_direct_mv_table[xy][0];
2809                         motion_y=s->b_direct_mv_table[xy][1];
2810                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2811                     }
2812                     break;
2813                 case CANDIDATE_MB_TYPE_DIRECT0:
2814                     if (CONFIG_MPEG4_ENCODER) {
2815                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2816                         s->mb_intra= 0;
2817                         ff_mpeg4_set_direct_mv(s, 0, 0);
2818                     }
2819                     break;
2820                 case CANDIDATE_MB_TYPE_BIDIR:
2821                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2822                     s->mb_intra= 0;
2823                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2824                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2825                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2826                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2827                     break;
2828                 case CANDIDATE_MB_TYPE_BACKWARD:
2829                     s->mv_dir = MV_DIR_BACKWARD;
2830                     s->mb_intra= 0;
2831                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2832                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2833                     break;
2834                 case CANDIDATE_MB_TYPE_FORWARD:
2835                     s->mv_dir = MV_DIR_FORWARD;
2836                     s->mb_intra= 0;
2837                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2838                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2839                     break;
2840                 case CANDIDATE_MB_TYPE_FORWARD_I:
2841                     s->mv_dir = MV_DIR_FORWARD;
2842                     s->mv_type = MV_TYPE_FIELD;
2843                     s->mb_intra= 0;
2844                     for(i=0; i<2; i++){
2845                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2846                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2847                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2848                     }
2849                     break;
2850                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2851                     s->mv_dir = MV_DIR_BACKWARD;
2852                     s->mv_type = MV_TYPE_FIELD;
2853                     s->mb_intra= 0;
2854                     for(i=0; i<2; i++){
2855                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2856                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2857                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2858                     }
2859                     break;
2860                 case CANDIDATE_MB_TYPE_BIDIR_I:
2861                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2862                     s->mv_type = MV_TYPE_FIELD;
2863                     s->mb_intra= 0;
2864                     for(dir=0; dir<2; dir++){
2865                         for(i=0; i<2; i++){
2866                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2867                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2868                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2869                         }
2870                     }
2871                     break;
2872                 default:
2873                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2874                 }
2875
2876                 encode_mb(s, motion_x, motion_y);
2877
2878                 // RAL: Update last macroblock type
2879                 s->last_mv_dir = s->mv_dir;
2880
2881                 if (CONFIG_H263_ENCODER &&
2882                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2883                     ff_h263_update_motion_val(s);
2884
2885                 ff_MPV_decode_mb(s, s->block);
2886             }
2887
2888             /* clean the MV table in IPS frames for direct mode in B frames */
2889             if(s->mb_intra /* && I,P,S_TYPE */){
2890                 s->p_mv_table[xy][0]=0;
2891                 s->p_mv_table[xy][1]=0;
2892             }
2893
2894             if(s->flags&CODEC_FLAG_PSNR){
2895                 int w= 16;
2896                 int h= 16;
2897
2898                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2899                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2900
2901                 s->current_picture.f.error[0] += sse(
2902                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2903                     s->dest[0], w, h, s->linesize);
2904                 s->current_picture.f.error[1] += sse(
2905                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2906                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2907                 s->current_picture.f.error[2] += sse(
2908                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2909                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2910             }
2911             if(s->loop_filter){
2912                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2913                     ff_h263_loop_filter(s);
2914             }
2915             av_dlog(s->avctx, "MB %d %d bits\n",
2916                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2917         }
2918     }
2919
2920     //not beautiful here but we must write it before flushing so it has to be here
2921     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2922         ff_msmpeg4_encode_ext_header(s);
2923
2924     write_slice_end(s);
2925
2926     /* Send the last GOB if RTP */
2927     if (s->avctx->rtp_callback) {
2928         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2929         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2930         /* Call the RTP callback to send the last GOB */
2931         emms_c();
2932         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2933     }
2934
2935     return 0;
2936 }
2937
2938 #define MERGE(field) dst->field += src->field; src->field=0
2939 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2940     MERGE(me.scene_change_score);
2941     MERGE(me.mc_mb_var_sum_temp);
2942     MERGE(me.mb_var_sum_temp);
2943 }
2944
2945 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2946     int i;
2947
2948     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2949     MERGE(dct_count[1]);
2950     MERGE(mv_bits);
2951     MERGE(i_tex_bits);
2952     MERGE(p_tex_bits);
2953     MERGE(i_count);
2954     MERGE(f_count);
2955     MERGE(b_count);
2956     MERGE(skip_count);
2957     MERGE(misc_bits);
2958     MERGE(er.error_count);
2959     MERGE(padding_bug_score);
2960     MERGE(current_picture.f.error[0]);
2961     MERGE(current_picture.f.error[1]);
2962     MERGE(current_picture.f.error[2]);
2963
2964     if(dst->avctx->noise_reduction){
2965         for(i=0; i<64; i++){
2966             MERGE(dct_error_sum[0][i]);
2967             MERGE(dct_error_sum[1][i]);
2968         }
2969     }
2970
2971     assert(put_bits_count(&src->pb) % 8 ==0);
2972     assert(put_bits_count(&dst->pb) % 8 ==0);
2973     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2974     flush_put_bits(&dst->pb);
2975 }
2976
2977 static int estimate_qp(MpegEncContext *s, int dry_run){
2978     if (s->next_lambda){
2979         s->current_picture_ptr->f.quality =
2980         s->current_picture.f.quality = s->next_lambda;
2981         if(!dry_run) s->next_lambda= 0;
2982     } else if (!s->fixed_qscale) {
2983         s->current_picture_ptr->f.quality =
2984         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2985         if (s->current_picture.f.quality < 0)
2986             return -1;
2987     }
2988
2989     if(s->adaptive_quant){
2990         switch(s->codec_id){
2991         case AV_CODEC_ID_MPEG4:
2992             if (CONFIG_MPEG4_ENCODER)
2993                 ff_clean_mpeg4_qscales(s);
2994             break;
2995         case AV_CODEC_ID_H263:
2996         case AV_CODEC_ID_H263P:
2997         case AV_CODEC_ID_FLV1:
2998             if (CONFIG_H263_ENCODER)
2999                 ff_clean_h263_qscales(s);
3000             break;
3001         default:
3002             ff_init_qscale_tab(s);
3003         }
3004
3005         s->lambda= s->lambda_table[0];
3006         //FIXME broken
3007     }else
3008         s->lambda = s->current_picture.f.quality;
3009     update_qscale(s);
3010     return 0;
3011 }
3012
3013 /* must be called before writing the header */
3014 static void set_frame_distances(MpegEncContext * s){
3015     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3016     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3017
3018     if(s->pict_type==AV_PICTURE_TYPE_B){
3019         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3020         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3021     }else{
3022         s->pp_time= s->time - s->last_non_b_time;
3023         s->last_non_b_time= s->time;
3024         assert(s->picture_number==0 || s->pp_time > 0);
3025     }
3026 }
3027
3028 static int encode_picture(MpegEncContext *s, int picture_number)
3029 {
3030     int i, ret;
3031     int bits;
3032     int context_count = s->slice_context_count;
3033
3034     s->picture_number = picture_number;
3035
3036     /* Reset the average MB variance */
3037     s->me.mb_var_sum_temp    =
3038     s->me.mc_mb_var_sum_temp = 0;
3039
3040     /* we need to initialize some time vars before we can encode b-frames */
3041     // RAL: Condition added for MPEG1VIDEO
3042     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3043         set_frame_distances(s);
3044     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3045         ff_set_mpeg4_time(s);
3046
3047     s->me.scene_change_score=0;
3048
3049 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3050
3051     if(s->pict_type==AV_PICTURE_TYPE_I){
3052         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3053         else                        s->no_rounding=0;
3054     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3055         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3056             s->no_rounding ^= 1;
3057     }
3058
3059     if(s->flags & CODEC_FLAG_PASS2){
3060         if (estimate_qp(s,1) < 0)
3061             return -1;
3062         ff_get_2pass_fcode(s);
3063     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3064         if(s->pict_type==AV_PICTURE_TYPE_B)
3065             s->lambda= s->last_lambda_for[s->pict_type];
3066         else
3067             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3068         update_qscale(s);
3069     }
3070
3071     s->mb_intra=0; //for the rate distortion & bit compare functions
3072     for(i=1; i<context_count; i++){
3073         ret = ff_update_duplicate_context(s->thread_context[i], s);
3074         if (ret < 0)
3075             return ret;
3076     }
3077
3078     if(ff_init_me(s)<0)
3079         return -1;
3080
3081     /* Estimate motion for every MB */
3082     if(s->pict_type != AV_PICTURE_TYPE_I){
3083         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3084         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3085         if (s->pict_type != AV_PICTURE_TYPE_B) {
3086             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3087                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3088             }
3089         }
3090
3091         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3092     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3093         /* I-Frame */
3094         for(i=0; i<s->mb_stride*s->mb_height; i++)
3095             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3096
3097         if(!s->fixed_qscale){
3098             /* finding spatial complexity for I-frame rate control */
3099             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3100         }
3101     }
3102     for(i=1; i<context_count; i++){
3103         merge_context_after_me(s, s->thread_context[i]);
3104     }
3105     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3106     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3107     emms_c();
3108
3109     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3110         s->pict_type= AV_PICTURE_TYPE_I;
3111         for(i=0; i<s->mb_stride*s->mb_height; i++)
3112             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3113         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3114                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3115     }
3116
3117     if(!s->umvplus){
3118         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3119             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3120
3121             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3122                 int a,b;
3123                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3124                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3125                 s->f_code= FFMAX3(s->f_code, a, b);
3126             }
3127
3128             ff_fix_long_p_mvs(s);
3129             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3130             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3131                 int j;
3132                 for(i=0; i<2; i++){
3133                     for(j=0; j<2; j++)
3134                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3135                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3136                 }
3137             }
3138         }
3139
3140         if(s->pict_type==AV_PICTURE_TYPE_B){
3141             int a, b;
3142
3143             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3144             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3145             s->f_code = FFMAX(a, b);
3146
3147             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3148             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3149             s->b_code = FFMAX(a, b);
3150
3151             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3152             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3153             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3154             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3155             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3156                 int dir, j;
3157                 for(dir=0; dir<2; dir++){
3158                     for(i=0; i<2; i++){
3159                         for(j=0; j<2; j++){
3160                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3161                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3162                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3163                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3164                         }
3165                     }
3166                 }
3167             }
3168         }
3169     }
3170
3171     if (estimate_qp(s, 0) < 0)
3172         return -1;
3173
3174     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3175         s->qscale= 3; //reduce clipping problems
3176
3177     if (s->out_format == FMT_MJPEG) {
3178         /* for mjpeg, we do include qscale in the matrix */
3179         for(i=1;i<64;i++){
3180             int j= s->dsp.idct_permutation[i];
3181
3182             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3183         }
3184         s->y_dc_scale_table=
3185         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3186         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3187         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3188                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3189         s->qscale= 8;
3190     }
3191
3192     //FIXME var duplication
3193     s->current_picture_ptr->f.key_frame =
3194     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3195     s->current_picture_ptr->f.pict_type =
3196     s->current_picture.f.pict_type = s->pict_type;
3197
3198     if (s->current_picture.f.key_frame)
3199         s->picture_in_gop_number=0;
3200
3201     s->last_bits= put_bits_count(&s->pb);
3202     switch(s->out_format) {
3203     case FMT_MJPEG:
3204         if (CONFIG_MJPEG_ENCODER)
3205             ff_mjpeg_encode_picture_header(s);
3206         break;
3207     case FMT_H261:
3208         if (CONFIG_H261_ENCODER)
3209             ff_h261_encode_picture_header(s, picture_number);
3210         break;
3211     case FMT_H263:
3212         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3213             ff_wmv2_encode_picture_header(s, picture_number);
3214         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3215             ff_msmpeg4_encode_picture_header(s, picture_number);
3216         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3217             ff_mpeg4_encode_picture_header(s, picture_number);
3218         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3219             ff_rv10_encode_picture_header(s, picture_number);
3220         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3221             ff_rv20_encode_picture_header(s, picture_number);
3222         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3223             ff_flv_encode_picture_header(s, picture_number);
3224         else if (CONFIG_H263_ENCODER)
3225             ff_h263_encode_picture_header(s, picture_number);
3226         break;
3227     case FMT_MPEG1:
3228         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3229             ff_mpeg1_encode_picture_header(s, picture_number);
3230         break;
3231     default:
3232         assert(0);
3233     }
3234     bits= put_bits_count(&s->pb);
3235     s->header_bits= bits - s->last_bits;
3236
3237     for(i=1; i<context_count; i++){
3238         update_duplicate_context_after_me(s->thread_context[i], s);
3239     }
3240     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3241     for(i=1; i<context_count; i++){
3242         merge_context_after_encode(s, s->thread_context[i]);
3243     }
3244     emms_c();
3245     return 0;
3246 }
3247
3248 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3249     const int intra= s->mb_intra;
3250     int i;
3251
3252     s->dct_count[intra]++;
3253
3254     for(i=0; i<64; i++){
3255         int level= block[i];
3256
3257         if(level){
3258             if(level>0){
3259                 s->dct_error_sum[intra][i] += level;
3260                 level -= s->dct_offset[intra][i];
3261                 if(level<0) level=0;
3262             }else{
3263                 s->dct_error_sum[intra][i] -= level;
3264                 level += s->dct_offset[intra][i];
3265                 if(level>0) level=0;
3266             }
3267             block[i]= level;
3268         }
3269     }
3270 }
3271
3272 static int dct_quantize_trellis_c(MpegEncContext *s,
3273                                   int16_t *block, int n,
3274                                   int qscale, int *overflow){
3275     const int *qmat;
3276     const uint8_t *scantable= s->intra_scantable.scantable;
3277     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3278     int max=0;
3279     unsigned int threshold1, threshold2;
3280     int bias=0;
3281     int run_tab[65];
3282     int level_tab[65];
3283     int score_tab[65];
3284     int survivor[65];
3285     int survivor_count;
3286     int last_run=0;
3287     int last_level=0;
3288     int last_score= 0;
3289     int last_i;
3290     int coeff[2][64];
3291     int coeff_count[64];
3292     int qmul, qadd, start_i, last_non_zero, i, dc;
3293     const int esc_length= s->ac_esc_length;
3294     uint8_t * length;
3295     uint8_t * last_length;
3296     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3297
3298     s->dsp.fdct (block);
3299
3300     if(s->dct_error_sum)
3301         s->denoise_dct(s, block);
3302     qmul= qscale*16;
3303     qadd= ((qscale-1)|1)*8;
3304
3305     if (s->mb_intra) {
3306         int q;
3307         if (!s->h263_aic) {
3308             if (n < 4)
3309                 q = s->y_dc_scale;
3310             else
3311                 q = s->c_dc_scale;
3312             q = q << 3;
3313         } else{
3314             /* For AIC we skip quant/dequant of INTRADC */
3315             q = 1 << 3;
3316             qadd=0;
3317         }
3318
3319         /* note: block[0] is assumed to be positive */
3320         block[0] = (block[0] + (q >> 1)) / q;
3321         start_i = 1;
3322         last_non_zero = 0;
3323         qmat = s->q_intra_matrix[qscale];
3324         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3325             bias= 1<<(QMAT_SHIFT-1);
3326         length     = s->intra_ac_vlc_length;
3327         last_length= s->intra_ac_vlc_last_length;
3328     } else {
3329         start_i = 0;
3330         last_non_zero = -1;
3331         qmat = s->q_inter_matrix[qscale];
3332         length     = s->inter_ac_vlc_length;
3333         last_length= s->inter_ac_vlc_last_length;
3334     }
3335     last_i= start_i;
3336
3337     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3338     threshold2= (threshold1<<1);
3339
3340     for(i=63; i>=start_i; i--) {
3341         const int j = scantable[i];
3342         int level = block[j] * qmat[j];
3343
3344         if(((unsigned)(level+threshold1))>threshold2){
3345             last_non_zero = i;
3346             break;
3347         }
3348     }
3349
3350     for(i=start_i; i<=last_non_zero; i++) {
3351         const int j = scantable[i];
3352         int level = block[j] * qmat[j];
3353
3354 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3355 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3356         if(((unsigned)(level+threshold1))>threshold2){
3357             if(level>0){
3358                 level= (bias + level)>>QMAT_SHIFT;
3359                 coeff[0][i]= level;
3360                 coeff[1][i]= level-1;
3361 //                coeff[2][k]= level-2;
3362             }else{
3363                 level= (bias - level)>>QMAT_SHIFT;
3364                 coeff[0][i]= -level;
3365                 coeff[1][i]= -level+1;
3366 //                coeff[2][k]= -level+2;
3367             }
3368             coeff_count[i]= FFMIN(level, 2);
3369             assert(coeff_count[i]);
3370             max |=level;
3371         }else{
3372             coeff[0][i]= (level>>31)|1;
3373             coeff_count[i]= 1;
3374         }
3375     }
3376
3377     *overflow= s->max_qcoeff < max; //overflow might have happened
3378
3379     if(last_non_zero < start_i){
3380         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3381         return last_non_zero;
3382     }
3383
3384     score_tab[start_i]= 0;
3385     survivor[0]= start_i;
3386     survivor_count= 1;
3387
3388     for(i=start_i; i<=last_non_zero; i++){
3389         int level_index, j, zero_distortion;
3390         int dct_coeff= FFABS(block[ scantable[i] ]);
3391         int best_score=256*256*256*120;
3392
3393         if (s->dsp.fdct == ff_fdct_ifast)
3394             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3395         zero_distortion= dct_coeff*dct_coeff;
3396
3397         for(level_index=0; level_index < coeff_count[i]; level_index++){
3398             int distortion;
3399             int level= coeff[level_index][i];
3400             const int alevel= FFABS(level);
3401             int unquant_coeff;
3402
3403             assert(level);
3404
3405             if(s->out_format == FMT_H263){
3406                 unquant_coeff= alevel*qmul + qadd;
3407             }else{ //MPEG1
3408                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3409                 if(s->mb_intra){
3410                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3411                         unquant_coeff =   (unquant_coeff - 1) | 1;
3412                 }else{
3413                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3414                         unquant_coeff =   (unquant_coeff - 1) | 1;
3415                 }
3416                 unquant_coeff<<= 3;
3417             }
3418
3419             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3420             level+=64;
3421             if((level&(~127)) == 0){
3422                 for(j=survivor_count-1; j>=0; j--){
3423                     int run= i - survivor[j];
3424                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3425                     score += score_tab[i-run];
3426
3427                     if(score < best_score){
3428                         best_score= score;
3429                         run_tab[i+1]= run;
3430                         level_tab[i+1]= level-64;
3431                     }
3432                 }
3433
3434                 if(s->out_format == FMT_H263){
3435                     for(j=survivor_count-1; j>=0; j--){
3436                         int run= i - survivor[j];
3437                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3438                         score += score_tab[i-run];
3439                         if(score < last_score){
3440                             last_score= score;
3441                             last_run= run;
3442                             last_level= level-64;
3443                             last_i= i+1;
3444                         }
3445                     }
3446                 }
3447             }else{
3448                 distortion += esc_length*lambda;
3449                 for(j=survivor_count-1; j>=0; j--){
3450                     int run= i - survivor[j];
3451                     int score= distortion + score_tab[i-run];
3452
3453                     if(score < best_score){
3454                         best_score= score;
3455                         run_tab[i+1]= run;
3456                         level_tab[i+1]= level-64;
3457                     }
3458                 }
3459
3460                 if(s->out_format == FMT_H263){
3461                   for(j=survivor_count-1; j>=0; j--){
3462                         int run= i - survivor[j];
3463                         int score= distortion + score_tab[i-run];
3464                         if(score < last_score){
3465                             last_score= score;
3466                             last_run= run;
3467                             last_level= level-64;
3468                             last_i= i+1;
3469                         }
3470                     }
3471                 }
3472             }
3473         }
3474
3475         score_tab[i+1]= best_score;
3476
3477         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3478         if(last_non_zero <= 27){
3479             for(; survivor_count; survivor_count--){
3480                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3481                     break;
3482             }
3483         }else{
3484             for(; survivor_count; survivor_count--){
3485                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3486                     break;
3487             }
3488         }
3489
3490         survivor[ survivor_count++ ]= i+1;
3491     }
3492
3493     if(s->out_format != FMT_H263){
3494         last_score= 256*256*256*120;
3495         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3496             int score= score_tab[i];
3497             if(i) score += lambda*2; //FIXME exacter?
3498
3499             if(score < last_score){
3500                 last_score= score;
3501                 last_i= i;
3502                 last_level= level_tab[i];
3503                 last_run= run_tab[i];
3504             }
3505         }
3506     }
3507
3508     s->coded_score[n] = last_score;
3509
3510     dc= FFABS(block[0]);
3511     last_non_zero= last_i - 1;
3512     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3513
3514     if(last_non_zero < start_i)
3515         return last_non_zero;
3516
3517     if(last_non_zero == 0 && start_i == 0){
3518         int best_level= 0;
3519         int best_score= dc * dc;
3520
3521         for(i=0; i<coeff_count[0]; i++){
3522             int level= coeff[i][0];
3523             int alevel= FFABS(level);
3524             int unquant_coeff, score, distortion;
3525
3526             if(s->out_format == FMT_H263){
3527                     unquant_coeff= (alevel*qmul + qadd)>>3;
3528             }else{ //MPEG1
3529                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3530                     unquant_coeff =   (unquant_coeff - 1) | 1;
3531             }
3532             unquant_coeff = (unquant_coeff + 4) >> 3;
3533             unquant_coeff<<= 3 + 3;
3534
3535             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3536             level+=64;
3537             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3538             else                    score= distortion + esc_length*lambda;
3539
3540             if(score < best_score){
3541                 best_score= score;
3542                 best_level= level - 64;
3543             }
3544         }
3545         block[0]= best_level;
3546         s->coded_score[n] = best_score - dc*dc;
3547         if(best_level == 0) return -1;
3548         else                return last_non_zero;
3549     }
3550
3551     i= last_i;
3552     assert(last_level);
3553
3554     block[ perm_scantable[last_non_zero] ]= last_level;
3555     i -= last_run + 1;
3556
3557     for(; i>start_i; i -= run_tab[i] + 1){
3558         block[ perm_scantable[i-1] ]= level_tab[i];
3559     }
3560
3561     return last_non_zero;
3562 }
3563
3564 //#define REFINE_STATS 1
3565 static int16_t basis[64][64];
3566
3567 static void build_basis(uint8_t *perm){
3568     int i, j, x, y;
3569     emms_c();
3570     for(i=0; i<8; i++){
3571         for(j=0; j<8; j++){
3572             for(y=0; y<8; y++){
3573                 for(x=0; x<8; x++){
3574                     double s= 0.25*(1<<BASIS_SHIFT);
3575                     int index= 8*i + j;
3576                     int perm_index= perm[index];
3577                     if(i==0) s*= sqrt(0.5);
3578                     if(j==0) s*= sqrt(0.5);
3579                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3580                 }
3581             }
3582         }
3583     }
3584 }
3585
3586 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3587                         int16_t *block, int16_t *weight, int16_t *orig,
3588                         int n, int qscale){
3589     int16_t rem[64];
3590     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3591     const uint8_t *scantable= s->intra_scantable.scantable;
3592     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3593 //    unsigned int threshold1, threshold2;
3594 //    int bias=0;
3595     int run_tab[65];
3596     int prev_run=0;
3597     int prev_level=0;
3598     int qmul, qadd, start_i, last_non_zero, i, dc;
3599     uint8_t * length;
3600     uint8_t * last_length;
3601     int lambda;
3602     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3603 #ifdef REFINE_STATS
3604 static int count=0;
3605 static int after_last=0;
3606 static int to_zero=0;
3607 static int from_zero=0;
3608 static int raise=0;
3609 static int lower=0;
3610 static int messed_sign=0;
3611 #endif
3612
3613     if(basis[0][0] == 0)
3614         build_basis(s->dsp.idct_permutation);
3615
3616     qmul= qscale*2;
3617     qadd= (qscale-1)|1;
3618     if (s->mb_intra) {
3619         if (!s->h263_aic) {
3620             if (n < 4)
3621                 q = s->y_dc_scale;
3622             else
3623                 q = s->c_dc_scale;
3624         } else{
3625             /* For AIC we skip quant/dequant of INTRADC */
3626             q = 1;
3627             qadd=0;
3628         }
3629         q <<= RECON_SHIFT-3;
3630         /* note: block[0] is assumed to be positive */
3631         dc= block[0]*q;
3632 //        block[0] = (block[0] + (q >> 1)) / q;
3633         start_i = 1;
3634 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3635 //            bias= 1<<(QMAT_SHIFT-1);
3636         length     = s->intra_ac_vlc_length;
3637         last_length= s->intra_ac_vlc_last_length;
3638     } else {
3639         dc= 0;
3640         start_i = 0;
3641         length     = s->inter_ac_vlc_length;
3642         last_length= s->inter_ac_vlc_last_length;
3643     }
3644     last_non_zero = s->block_last_index[n];
3645
3646 #ifdef REFINE_STATS
3647 {START_TIMER
3648 #endif
3649     dc += (1<<(RECON_SHIFT-1));
3650     for(i=0; i<64; i++){
3651         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3652     }
3653 #ifdef REFINE_STATS
3654 STOP_TIMER("memset rem[]")}
3655 #endif
3656     sum=0;
3657     for(i=0; i<64; i++){
3658         int one= 36;
3659         int qns=4;
3660         int w;
3661
3662         w= FFABS(weight[i]) + qns*one;
3663         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3664
3665         weight[i] = w;
3666 //        w=weight[i] = (63*qns + (w/2)) / w;
3667
3668         assert(w>0);
3669         assert(w<(1<<6));
3670         sum += w*w;
3671     }
3672     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3673 #ifdef REFINE_STATS
3674 {START_TIMER
3675 #endif
3676     run=0;
3677     rle_index=0;
3678     for(i=start_i; i<=last_non_zero; i++){
3679         int j= perm_scantable[i];
3680         const int level= block[j];
3681         int coeff;
3682
3683         if(level){
3684             if(level<0) coeff= qmul*level - qadd;
3685             else        coeff= qmul*level + qadd;
3686             run_tab[rle_index++]=run;
3687             run=0;
3688
3689             s->dsp.add_8x8basis(rem, basis[j], coeff);
3690         }else{
3691             run++;
3692         }
3693     }
3694 #ifdef REFINE_STATS
3695 if(last_non_zero>0){
3696 STOP_TIMER("init rem[]")
3697 }
3698 }
3699
3700 {START_TIMER
3701 #endif
3702     for(;;){
3703         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3704         int best_coeff=0;
3705         int best_change=0;
3706         int run2, best_unquant_change=0, analyze_gradient;
3707 #ifdef REFINE_STATS
3708 {START_TIMER
3709 #endif
3710         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3711
3712         if(analyze_gradient){
3713 #ifdef REFINE_STATS
3714 {START_TIMER
3715 #endif
3716             for(i=0; i<64; i++){
3717                 int w= weight[i];
3718
3719                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3720             }
3721 #ifdef REFINE_STATS
3722 STOP_TIMER("rem*w*w")}
3723 {START_TIMER
3724 #endif
3725             s->dsp.fdct(d1);
3726 #ifdef REFINE_STATS
3727 STOP_TIMER("dct")}
3728 #endif
3729         }
3730
3731         if(start_i){
3732             const int level= block[0];
3733             int change, old_coeff;
3734
3735             assert(s->mb_intra);
3736
3737             old_coeff= q*level;
3738
3739             for(change=-1; change<=1; change+=2){
3740                 int new_level= level + change;
3741                 int score, new_coeff;
3742
3743                 new_coeff= q*new_level;
3744                 if(new_coeff >= 2048 || new_coeff < 0)
3745                     continue;
3746
3747                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3748                 if(score<best_score){
3749                     best_score= score;
3750                     best_coeff= 0;
3751                     best_change= change;
3752                     best_unquant_change= new_coeff - old_coeff;
3753                 }
3754             }
3755         }
3756
3757         run=0;
3758         rle_index=0;
3759         run2= run_tab[rle_index++];
3760         prev_level=0;
3761         prev_run=0;
3762
3763         for(i=start_i; i<64; i++){
3764             int j= perm_scantable[i];
3765             const int level= block[j];
3766             int change, old_coeff;
3767
3768             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3769                 break;
3770
3771             if(level){
3772                 if(level<0) old_coeff= qmul*level - qadd;
3773                 else        old_coeff= qmul*level + qadd;
3774                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3775             }else{
3776                 old_coeff=0;
3777                 run2--;
3778                 assert(run2>=0 || i >= last_non_zero );
3779             }
3780
3781             for(change=-1; change<=1; change+=2){
3782                 int new_level= level + change;
3783                 int score, new_coeff, unquant_change;
3784
3785                 score=0;
3786                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3787                    continue;
3788
3789                 if(new_level){
3790                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3791                     else            new_coeff= qmul*new_level + qadd;
3792                     if(new_coeff >= 2048 || new_coeff <= -2048)
3793                         continue;
3794                     //FIXME check for overflow
3795
3796                     if(level){
3797                         if(level < 63 && level > -63){
3798                             if(i < last_non_zero)
3799                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3800                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3801                             else
3802                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3803                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3804                         }
3805                     }else{
3806                         assert(FFABS(new_level)==1);
3807
3808                         if(analyze_gradient){
3809                             int g= d1[ scantable[i] ];
3810                             if(g && (g^new_level) >= 0)
3811                                 continue;
3812                         }
3813
3814                         if(i < last_non_zero){
3815                             int next_i= i + run2 + 1;
3816                             int next_level= block[ perm_scantable[next_i] ] + 64;
3817
3818                             if(next_level&(~127))
3819                                 next_level= 0;
3820
3821                             if(next_i < last_non_zero)
3822                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3823                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3824                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3825                             else
3826                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3827                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3828                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3829                         }else{
3830                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3831                             if(prev_level){
3832                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3833                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3834                             }
3835                         }
3836                     }
3837                 }else{
3838                     new_coeff=0;
3839                     assert(FFABS(level)==1);
3840
3841                     if(i < last_non_zero){
3842                         int next_i= i + run2 + 1;
3843                         int next_level= block[ perm_scantable[next_i] ] + 64;
3844
3845                         if(next_level&(~127))
3846                             next_level= 0;
3847
3848                         if(next_i < last_non_zero)
3849                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3850                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3851                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3852                         else
3853                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3854                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3855                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3856                     }else{
3857                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3858                         if(prev_level){
3859                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3860                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3861                         }
3862                     }
3863                 }
3864
3865                 score *= lambda;
3866
3867                 unquant_change= new_coeff - old_coeff;
3868                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3869
3870                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3871                 if(score<best_score){
3872                     best_score= score;
3873                     best_coeff= i;
3874                     best_change= change;
3875                     best_unquant_change= unquant_change;
3876                 }
3877             }
3878             if(level){
3879                 prev_level= level + 64;
3880                 if(prev_level&(~127))
3881                     prev_level= 0;
3882                 prev_run= run;
3883                 run=0;
3884             }else{
3885                 run++;
3886             }
3887         }
3888 #ifdef REFINE_STATS
3889 STOP_TIMER("iterative step")}
3890 #endif
3891
3892         if(best_change){
3893             int j= perm_scantable[ best_coeff ];
3894
3895             block[j] += best_change;
3896
3897             if(best_coeff > last_non_zero){
3898                 last_non_zero= best_coeff;
3899                 assert(block[j]);
3900 #ifdef REFINE_STATS
3901 after_last++;
3902 #endif
3903             }else{
3904 #ifdef REFINE_STATS
3905 if(block[j]){
3906     if(block[j] - best_change){
3907         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3908             raise++;
3909         }else{
3910             lower++;
3911         }
3912     }else{
3913         from_zero++;
3914     }
3915 }else{
3916     to_zero++;
3917 }
3918 #endif
3919                 for(; last_non_zero>=start_i; last_non_zero--){
3920                     if(block[perm_scantable[last_non_zero]])
3921                         break;
3922                 }
3923             }
3924 #ifdef REFINE_STATS
3925 count++;
3926 if(256*256*256*64 % count == 0){
3927     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3928 }
3929 #endif
3930             run=0;
3931             rle_index=0;
3932             for(i=start_i; i<=last_non_zero; i++){
3933                 int j= perm_scantable[i];
3934                 const int level= block[j];
3935
3936                  if(level){
3937                      run_tab[rle_index++]=run;
3938                      run=0;
3939                  }else{
3940                      run++;
3941                  }
3942             }
3943
3944             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3945         }else{
3946             break;
3947         }
3948     }
3949 #ifdef REFINE_STATS
3950 if(last_non_zero>0){
3951 STOP_TIMER("iterative search")
3952 }
3953 }
3954 #endif
3955
3956     return last_non_zero;
3957 }
3958
3959 int ff_dct_quantize_c(MpegEncContext *s,
3960                         int16_t *block, int n,
3961                         int qscale, int *overflow)
3962 {
3963     int i, j, level, last_non_zero, q, start_i;
3964     const int *qmat;
3965     const uint8_t *scantable= s->intra_scantable.scantable;
3966     int bias;
3967     int max=0;
3968     unsigned int threshold1, threshold2;
3969
3970     s->dsp.fdct (block);
3971
3972     if(s->dct_error_sum)
3973         s->denoise_dct(s, block);
3974
3975     if (s->mb_intra) {
3976         if (!s->h263_aic) {
3977             if (n < 4)
3978                 q = s->y_dc_scale;
3979             else
3980                 q = s->c_dc_scale;
3981             q = q << 3;
3982         } else
3983             /* For AIC we skip quant/dequant of INTRADC */
3984             q = 1 << 3;
3985
3986         /* note: block[0] is assumed to be positive */
3987         block[0] = (block[0] + (q >> 1)) / q;
3988         start_i = 1;
3989         last_non_zero = 0;
3990         qmat = s->q_intra_matrix[qscale];
3991         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3992     } else {
3993         start_i = 0;
3994         last_non_zero = -1;
3995         qmat = s->q_inter_matrix[qscale];
3996         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3997     }
3998     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3999     threshold2= (threshold1<<1);
4000     for(i=63;i>=start_i;i--) {
4001         j = scantable[i];
4002         level = block[j] * qmat[j];
4003
4004         if(((unsigned)(level+threshold1))>threshold2){
4005             last_non_zero = i;
4006             break;
4007         }else{
4008             block[j]=0;
4009         }
4010     }
4011     for(i=start_i; i<=last_non_zero; i++) {
4012         j = scantable[i];
4013         level = block[j] * qmat[j];
4014
4015 //        if(   bias+level >= (1<<QMAT_SHIFT)
4016 //           || bias-level >= (1<<QMAT_SHIFT)){
4017         if(((unsigned)(level+threshold1))>threshold2){
4018             if(level>0){
4019                 level= (bias + level)>>QMAT_SHIFT;
4020                 block[j]= level;
4021             }else{
4022                 level= (bias - level)>>QMAT_SHIFT;
4023                 block[j]= -level;
4024             }
4025             max |=level;
4026         }else{
4027             block[j]=0;
4028         }
4029     }
4030     *overflow= s->max_qcoeff < max; //overflow might have happened
4031
4032     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4033     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4034         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4035
4036     return last_non_zero;
4037 }
4038
4039 #define OFFSET(x) offsetof(MpegEncContext, x)
4040 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4041 static const AVOption h263_options[] = {
4042     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4043     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4044     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4045     FF_MPV_COMMON_OPTS
4046     { NULL },
4047 };
4048
4049 static const AVClass h263_class = {
4050     .class_name = "H.263 encoder",
4051     .item_name  = av_default_item_name,
4052     .option     = h263_options,
4053     .version    = LIBAVUTIL_VERSION_INT,
4054 };
4055
4056 AVCodec ff_h263_encoder = {
4057     .name           = "h263",
4058     .type           = AVMEDIA_TYPE_VIDEO,
4059     .id             = AV_CODEC_ID_H263,
4060     .priv_data_size = sizeof(MpegEncContext),
4061     .init           = ff_MPV_encode_init,
4062     .encode2        = ff_MPV_encode_picture,
4063     .close          = ff_MPV_encode_end,
4064     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4065     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4066     .priv_class     = &h263_class,
4067 };
4068
4069 static const AVOption h263p_options[] = {
4070     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4071     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4072     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4073     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4074     FF_MPV_COMMON_OPTS
4075     { NULL },
4076 };
4077 static const AVClass h263p_class = {
4078     .class_name = "H.263p encoder",
4079     .item_name  = av_default_item_name,
4080     .option     = h263p_options,
4081     .version    = LIBAVUTIL_VERSION_INT,
4082 };
4083
4084 AVCodec ff_h263p_encoder = {
4085     .name           = "h263p",
4086     .type           = AVMEDIA_TYPE_VIDEO,
4087     .id             = AV_CODEC_ID_H263P,
4088     .priv_data_size = sizeof(MpegEncContext),
4089     .init           = ff_MPV_encode_init,
4090     .encode2        = ff_MPV_encode_picture,
4091     .close          = ff_MPV_encode_end,
4092     .capabilities   = CODEC_CAP_SLICE_THREADS,
4093     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4094     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4095     .priv_class     = &h263p_class,
4096 };
4097
4098 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4099
4100 AVCodec ff_msmpeg4v2_encoder = {
4101     .name           = "msmpeg4v2",
4102     .type           = AVMEDIA_TYPE_VIDEO,
4103     .id             = AV_CODEC_ID_MSMPEG4V2,
4104     .priv_data_size = sizeof(MpegEncContext),
4105     .init           = ff_MPV_encode_init,
4106     .encode2        = ff_MPV_encode_picture,
4107     .close          = ff_MPV_encode_end,
4108     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4109     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4110     .priv_class     = &msmpeg4v2_class,
4111 };
4112
4113 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4114
4115 AVCodec ff_msmpeg4v3_encoder = {
4116     .name           = "msmpeg4",
4117     .type           = AVMEDIA_TYPE_VIDEO,
4118     .id             = AV_CODEC_ID_MSMPEG4V3,
4119     .priv_data_size = sizeof(MpegEncContext),
4120     .init           = ff_MPV_encode_init,
4121     .encode2        = ff_MPV_encode_picture,
4122     .close          = ff_MPV_encode_end,
4123     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4124     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4125     .priv_class     = &msmpeg4v3_class,
4126 };
4127
4128 FF_MPV_GENERIC_CLASS(wmv1)
4129
4130 AVCodec ff_wmv1_encoder = {
4131     .name           = "wmv1",
4132     .type           = AVMEDIA_TYPE_VIDEO,
4133     .id             = AV_CODEC_ID_WMV1,
4134     .priv_data_size = sizeof(MpegEncContext),
4135     .init           = ff_MPV_encode_init,
4136     .encode2        = ff_MPV_encode_picture,
4137     .close          = ff_MPV_encode_end,
4138     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4139     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4140     .priv_class     = &wmv1_class,
4141 };