]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
error_resilience: decouple ER from MpegEncContext
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpegvideo.h"
39 #include "h263.h"
40 #include "mathops.h"
41 #include "mjpegenc.h"
42 #include "msmpeg4.h"
43 #include "faandct.h"
44 #include "thread.h"
45 #include "aandcttab.h"
46 #include "flv.h"
47 #include "mpeg4video.h"
48 #include "internal.h"
49 #include "bytestream.h"
50 #include <limits.h>
51
52 //#undef NDEBUG
53 //#include <assert.h>
54
55 static int encode_picture(MpegEncContext *s, int picture_number);
56 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
57 static int sse_mb(MpegEncContext *s);
58 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
59 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
60
61 /* enable all paranoid tests for rounding, overflows, etc... */
62 //#define PARANOID
63
64 //#define DEBUG
65
66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
68
69 const AVOption ff_mpv_generic_options[] = {
70     FF_MPV_COMMON_OPTS
71     { NULL },
72 };
73
74 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
75                        uint16_t (*qmat16)[2][64],
76                        const uint16_t *quant_matrix,
77                        int bias, int qmin, int qmax, int intra)
78 {
79     int qscale;
80     int shift = 0;
81
82     for (qscale = qmin; qscale <= qmax; qscale++) {
83         int i;
84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
86             dsp->fdct == ff_faandct) {
87             for (i = 0; i < 64; i++) {
88                 const int j = dsp->idct_permutation[i];
89                 /* 16 <= qscale * quant_matrix[i] <= 7905
90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
91                  *             19952 <=              x  <= 249205026
92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
93                  *           3444240 >= (1 << 36) / (x) >= 275 */
94
95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
96                                         (qscale * quant_matrix[j]));
97             }
98         } else if (dsp->fdct == ff_fdct_ifast) {
99             for (i = 0; i < 64; i++) {
100                 const int j = dsp->idct_permutation[i];
101                 /* 16 <= qscale * quant_matrix[i] <= 7905
102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
103                  *             19952 <=              x  <= 249205026
104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
105                  *           3444240 >= (1 << 36) / (x) >= 275 */
106
107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
108                                         (ff_aanscales[i] * qscale *
109                                          quant_matrix[j]));
110             }
111         } else {
112             for (i = 0; i < 64; i++) {
113                 const int j = dsp->idct_permutation[i];
114                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
115                  * Assume x = qscale * quant_matrix[i]
116                  * So             16 <=              x  <= 7905
117                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
118                  * so          32768 >= (1 << 19) / (x) >= 67 */
119                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
120                                         (qscale * quant_matrix[j]));
121                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
122                 //                    (qscale * quant_matrix[i]);
123                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
124                                        (qscale * quant_matrix[j]);
125
126                 if (qmat16[qscale][0][i] == 0 ||
127                     qmat16[qscale][0][i] == 128 * 256)
128                     qmat16[qscale][0][i] = 128 * 256 - 1;
129                 qmat16[qscale][1][i] =
130                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
131                                 qmat16[qscale][0][i]);
132             }
133         }
134
135         for (i = intra; i < 64; i++) {
136             int64_t max = 8191;
137             if (dsp->fdct == ff_fdct_ifast) {
138                 max = (8191LL * ff_aanscales[i]) >> 14;
139             }
140             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
141                 shift++;
142             }
143         }
144     }
145     if (shift) {
146         av_log(NULL, AV_LOG_INFO,
147                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
148                QMAT_SHIFT - shift);
149     }
150 }
151
152 static inline void update_qscale(MpegEncContext *s)
153 {
154     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
155                 (FF_LAMBDA_SHIFT + 7);
156     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
157
158     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
159                  FF_LAMBDA_SHIFT;
160 }
161
162 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
163 {
164     int i;
165
166     if (matrix) {
167         put_bits(pb, 1, 1);
168         for (i = 0; i < 64; i++) {
169             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
170         }
171     } else
172         put_bits(pb, 1, 0);
173 }
174
175 /**
176  * init s->current_picture.qscale_table from s->lambda_table
177  */
178 void ff_init_qscale_tab(MpegEncContext *s)
179 {
180     int8_t * const qscale_table = s->current_picture.f.qscale_table;
181     int i;
182
183     for (i = 0; i < s->mb_num; i++) {
184         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
185         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
186         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
187                                                   s->avctx->qmax);
188     }
189 }
190
191 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst,
192                                     const AVFrame *src)
193 {
194     int i;
195
196     dst->pict_type              = src->pict_type;
197     dst->quality                = src->quality;
198     dst->coded_picture_number   = src->coded_picture_number;
199     dst->display_picture_number = src->display_picture_number;
200     //dst->reference              = src->reference;
201     dst->pts                    = src->pts;
202     dst->interlaced_frame       = src->interlaced_frame;
203     dst->top_field_first        = src->top_field_first;
204
205     if (s->avctx->me_threshold) {
206         if (!src->motion_val[0])
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
208         if (!src->mb_type)
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
210         if (!src->ref_index[0])
211             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
212         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
213             av_log(s->avctx, AV_LOG_ERROR,
214                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
215                    src->motion_subsample_log2, dst->motion_subsample_log2);
216
217         memcpy(dst->mb_type, src->mb_type,
218                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
219
220         for (i = 0; i < 2; i++) {
221             int stride = ((16 * s->mb_width ) >>
222                           src->motion_subsample_log2) + 1;
223             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
224
225             if (src->motion_val[i] &&
226                 src->motion_val[i] != dst->motion_val[i]) {
227                 memcpy(dst->motion_val[i], src->motion_val[i],
228                        2 * stride * height * sizeof(int16_t));
229             }
230             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
231                 memcpy(dst->ref_index[i], src->ref_index[i],
232                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
233             }
234         }
235     }
236 }
237
238 static void update_duplicate_context_after_me(MpegEncContext *dst,
239                                               MpegEncContext *src)
240 {
241 #define COPY(a) dst->a= src->a
242     COPY(pict_type);
243     COPY(current_picture);
244     COPY(f_code);
245     COPY(b_code);
246     COPY(qscale);
247     COPY(lambda);
248     COPY(lambda2);
249     COPY(picture_in_gop_number);
250     COPY(gop_picture_number);
251     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
252     COPY(progressive_frame);    // FIXME don't set in encode_header
253     COPY(partitioned_frame);    // FIXME don't set in encode_header
254 #undef COPY
255 }
256
257 /**
258  * Set the given MpegEncContext to defaults for encoding.
259  * the changed fields will not depend upon the prior state of the MpegEncContext.
260  */
261 static void MPV_encode_defaults(MpegEncContext *s)
262 {
263     int i;
264     ff_MPV_common_defaults(s);
265
266     for (i = -16; i < 16; i++) {
267         default_fcode_tab[i + MAX_MV] = 1;
268     }
269     s->me.mv_penalty = default_mv_penalty;
270     s->fcode_tab     = default_fcode_tab;
271 }
272
273 /* init video encoder */
274 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
275 {
276     MpegEncContext *s = avctx->priv_data;
277     int i;
278     int chroma_h_shift, chroma_v_shift;
279
280     MPV_encode_defaults(s);
281
282     switch (avctx->codec_id) {
283     case AV_CODEC_ID_MPEG2VIDEO:
284         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
285             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
286             av_log(avctx, AV_LOG_ERROR,
287                    "only YUV420 and YUV422 are supported\n");
288             return -1;
289         }
290         break;
291     case AV_CODEC_ID_LJPEG:
292         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
293             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
294             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
295             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
296             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
297               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
298               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
299              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
300             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
301             return -1;
302         }
303         break;
304     case AV_CODEC_ID_MJPEG:
305         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
306             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
307             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
308               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
309              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
310             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
311             return -1;
312         }
313         break;
314     default:
315         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
316             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
317             return -1;
318         }
319     }
320
321     switch (avctx->pix_fmt) {
322     case AV_PIX_FMT_YUVJ422P:
323     case AV_PIX_FMT_YUV422P:
324         s->chroma_format = CHROMA_422;
325         break;
326     case AV_PIX_FMT_YUVJ420P:
327     case AV_PIX_FMT_YUV420P:
328     default:
329         s->chroma_format = CHROMA_420;
330         break;
331     }
332
333     s->bit_rate = avctx->bit_rate;
334     s->width    = avctx->width;
335     s->height   = avctx->height;
336     if (avctx->gop_size > 600 &&
337         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
338         av_log(avctx, AV_LOG_ERROR,
339                "Warning keyframe interval too large! reducing it ...\n");
340         avctx->gop_size = 600;
341     }
342     s->gop_size     = avctx->gop_size;
343     s->avctx        = avctx;
344     s->flags        = avctx->flags;
345     s->flags2       = avctx->flags2;
346     s->max_b_frames = avctx->max_b_frames;
347     s->codec_id     = avctx->codec->id;
348 #if FF_API_MPV_GLOBAL_OPTS
349     if (avctx->luma_elim_threshold)
350         s->luma_elim_threshold   = avctx->luma_elim_threshold;
351     if (avctx->chroma_elim_threshold)
352         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
353 #endif
354     s->strict_std_compliance = avctx->strict_std_compliance;
355     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
356     s->mpeg_quant         = avctx->mpeg_quant;
357     s->rtp_mode           = !!avctx->rtp_payload_size;
358     s->intra_dc_precision = avctx->intra_dc_precision;
359     s->user_specified_pts = AV_NOPTS_VALUE;
360
361     if (s->gop_size <= 1) {
362         s->intra_only = 1;
363         s->gop_size   = 12;
364     } else {
365         s->intra_only = 0;
366     }
367
368     s->me_method = avctx->me_method;
369
370     /* Fixed QSCALE */
371     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
372
373 #if FF_API_MPV_GLOBAL_OPTS
374     if (s->flags & CODEC_FLAG_QP_RD)
375         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
376 #endif
377
378     s->adaptive_quant = (s->avctx->lumi_masking ||
379                          s->avctx->dark_masking ||
380                          s->avctx->temporal_cplx_masking ||
381                          s->avctx->spatial_cplx_masking  ||
382                          s->avctx->p_masking      ||
383                          s->avctx->border_masking ||
384                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
385                         !s->fixed_qscale;
386
387     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
388
389     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
390         av_log(avctx, AV_LOG_ERROR,
391                "a vbv buffer size is needed, "
392                "for encoding with a maximum bitrate\n");
393         return -1;
394     }
395
396     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
397         av_log(avctx, AV_LOG_INFO,
398                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
399     }
400
401     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
402         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
403         return -1;
404     }
405
406     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
407         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
408         return -1;
409     }
410
411     if (avctx->rc_max_rate &&
412         avctx->rc_max_rate == avctx->bit_rate &&
413         avctx->rc_max_rate != avctx->rc_min_rate) {
414         av_log(avctx, AV_LOG_INFO,
415                "impossible bitrate constraints, this will fail\n");
416     }
417
418     if (avctx->rc_buffer_size &&
419         avctx->bit_rate * (int64_t)avctx->time_base.num >
420             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
421         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
422         return -1;
423     }
424
425     if (!s->fixed_qscale &&
426         avctx->bit_rate * av_q2d(avctx->time_base) >
427             avctx->bit_rate_tolerance) {
428         av_log(avctx, AV_LOG_ERROR,
429                "bitrate tolerance too small for bitrate\n");
430         return -1;
431     }
432
433     if (s->avctx->rc_max_rate &&
434         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
435         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
436          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
437         90000LL * (avctx->rc_buffer_size - 1) >
438             s->avctx->rc_max_rate * 0xFFFFLL) {
439         av_log(avctx, AV_LOG_INFO,
440                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
441                "specified vbv buffer is too large for the given bitrate!\n");
442     }
443
444     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
445         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
446         s->codec_id != AV_CODEC_ID_FLV1) {
447         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
448         return -1;
449     }
450
451     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
452         av_log(avctx, AV_LOG_ERROR,
453                "OBMC is only supported with simple mb decision\n");
454         return -1;
455     }
456
457     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
458         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
459         return -1;
460     }
461
462     if (s->max_b_frames                    &&
463         s->codec_id != AV_CODEC_ID_MPEG4      &&
464         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
465         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
466         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
467         return -1;
468     }
469
470     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
471          s->codec_id == AV_CODEC_ID_H263  ||
472          s->codec_id == AV_CODEC_ID_H263P) &&
473         (avctx->sample_aspect_ratio.num > 255 ||
474          avctx->sample_aspect_ratio.den > 255)) {
475         av_log(avctx, AV_LOG_ERROR,
476                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
477                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
478         return -1;
479     }
480
481     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
482         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
483         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
484         return -1;
485     }
486
487     // FIXME mpeg2 uses that too
488     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
489         av_log(avctx, AV_LOG_ERROR,
490                "mpeg2 style quantization not supported by codec\n");
491         return -1;
492     }
493
494 #if FF_API_MPV_GLOBAL_OPTS
495     if (s->flags & CODEC_FLAG_CBP_RD)
496         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
497 #endif
498
499     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
500         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
501         return -1;
502     }
503
504     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
505         s->avctx->mb_decision != FF_MB_DECISION_RD) {
506         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
507         return -1;
508     }
509
510     if (s->avctx->scenechange_threshold < 1000000000 &&
511         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
512         av_log(avctx, AV_LOG_ERROR,
513                "closed gop with scene change detection are not supported yet, "
514                "set threshold to 1000000000\n");
515         return -1;
516     }
517
518     if (s->flags & CODEC_FLAG_LOW_DELAY) {
519         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
520             av_log(avctx, AV_LOG_ERROR,
521                   "low delay forcing is only available for mpeg2\n");
522             return -1;
523         }
524         if (s->max_b_frames != 0) {
525             av_log(avctx, AV_LOG_ERROR,
526                    "b frames cannot be used with low delay\n");
527             return -1;
528         }
529     }
530
531     if (s->q_scale_type == 1) {
532         if (avctx->qmax > 12) {
533             av_log(avctx, AV_LOG_ERROR,
534                    "non linear quant only supports qmax <= 12 currently\n");
535             return -1;
536         }
537     }
538
539     if (s->avctx->thread_count > 1         &&
540         s->codec_id != AV_CODEC_ID_MPEG4      &&
541         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
542         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
543         (s->codec_id != AV_CODEC_ID_H263P)) {
544         av_log(avctx, AV_LOG_ERROR,
545                "multi threaded encoding not supported by codec\n");
546         return -1;
547     }
548
549     if (s->avctx->thread_count < 1) {
550         av_log(avctx, AV_LOG_ERROR,
551                "automatic thread number detection not supported by codec,"
552                "patch welcome\n");
553         return -1;
554     }
555
556     if (s->avctx->thread_count > 1)
557         s->rtp_mode = 1;
558
559     if (!avctx->time_base.den || !avctx->time_base.num) {
560         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
561         return -1;
562     }
563
564     i = (INT_MAX / 2 + 128) >> 8;
565     if (avctx->me_threshold >= i) {
566         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
567                i - 1);
568         return -1;
569     }
570     if (avctx->mb_threshold >= i) {
571         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
572                i - 1);
573         return -1;
574     }
575
576     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
577         av_log(avctx, AV_LOG_INFO,
578                "notice: b_frame_strategy only affects the first pass\n");
579         avctx->b_frame_strategy = 0;
580     }
581
582     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
583     if (i > 1) {
584         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
585         avctx->time_base.den /= i;
586         avctx->time_base.num /= i;
587         //return -1;
588     }
589
590     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
591         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
592         // (a + x * 3 / 8) / x
593         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
594         s->inter_quant_bias = 0;
595     } else {
596         s->intra_quant_bias = 0;
597         // (a - x / 4) / x
598         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
599     }
600
601     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
602         s->intra_quant_bias = avctx->intra_quant_bias;
603     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
604         s->inter_quant_bias = avctx->inter_quant_bias;
605
606     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
607                                      &chroma_v_shift);
608
609     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
610         s->avctx->time_base.den > (1 << 16) - 1) {
611         av_log(avctx, AV_LOG_ERROR,
612                "timebase %d/%d not supported by MPEG 4 standard, "
613                "the maximum admitted value for the timebase denominator "
614                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
615                (1 << 16) - 1);
616         return -1;
617     }
618     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
619
620 #if FF_API_MPV_GLOBAL_OPTS
621     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
622         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
623     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
624         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
625     if (avctx->quantizer_noise_shaping)
626         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
627 #endif
628
629     switch (avctx->codec->id) {
630     case AV_CODEC_ID_MPEG1VIDEO:
631         s->out_format = FMT_MPEG1;
632         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
633         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
634         break;
635     case AV_CODEC_ID_MPEG2VIDEO:
636         s->out_format = FMT_MPEG1;
637         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
638         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
639         s->rtp_mode   = 1;
640         break;
641     case AV_CODEC_ID_LJPEG:
642     case AV_CODEC_ID_MJPEG:
643         s->out_format = FMT_MJPEG;
644         s->intra_only = 1; /* force intra only for jpeg */
645         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
646             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
647             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
648             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
649             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
650         } else {
651             s->mjpeg_vsample[0] = 2;
652             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
653             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
654             s->mjpeg_hsample[0] = 2;
655             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
656             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
657         }
658         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
659             ff_mjpeg_encode_init(s) < 0)
660             return -1;
661         avctx->delay = 0;
662         s->low_delay = 1;
663         break;
664     case AV_CODEC_ID_H261:
665         if (!CONFIG_H261_ENCODER)
666             return -1;
667         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
668             av_log(avctx, AV_LOG_ERROR,
669                    "The specified picture size of %dx%d is not valid for the "
670                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
671                     s->width, s->height);
672             return -1;
673         }
674         s->out_format = FMT_H261;
675         avctx->delay  = 0;
676         s->low_delay  = 1;
677         break;
678     case AV_CODEC_ID_H263:
679         if (!CONFIG_H263_ENCODER)
680         return -1;
681         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
682                              s->width, s->height) == 8) {
683             av_log(avctx, AV_LOG_INFO,
684                    "The specified picture size of %dx%d is not valid for "
685                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
686                    "352x288, 704x576, and 1408x1152."
687                    "Try H.263+.\n", s->width, s->height);
688             return -1;
689         }
690         s->out_format = FMT_H263;
691         avctx->delay  = 0;
692         s->low_delay  = 1;
693         break;
694     case AV_CODEC_ID_H263P:
695         s->out_format = FMT_H263;
696         s->h263_plus  = 1;
697         /* Fx */
698         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
699         s->modified_quant  = s->h263_aic;
700         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
701         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
702
703         /* /Fx */
704         /* These are just to be sure */
705         avctx->delay = 0;
706         s->low_delay = 1;
707         break;
708     case AV_CODEC_ID_FLV1:
709         s->out_format      = FMT_H263;
710         s->h263_flv        = 2; /* format = 1; 11-bit codes */
711         s->unrestricted_mv = 1;
712         s->rtp_mode  = 0; /* don't allow GOB */
713         avctx->delay = 0;
714         s->low_delay = 1;
715         break;
716     case AV_CODEC_ID_RV10:
717         s->out_format = FMT_H263;
718         avctx->delay  = 0;
719         s->low_delay  = 1;
720         break;
721     case AV_CODEC_ID_RV20:
722         s->out_format      = FMT_H263;
723         avctx->delay       = 0;
724         s->low_delay       = 1;
725         s->modified_quant  = 1;
726         s->h263_aic        = 1;
727         s->h263_plus       = 1;
728         s->loop_filter     = 1;
729         s->unrestricted_mv = 0;
730         break;
731     case AV_CODEC_ID_MPEG4:
732         s->out_format      = FMT_H263;
733         s->h263_pred       = 1;
734         s->unrestricted_mv = 1;
735         s->low_delay       = s->max_b_frames ? 0 : 1;
736         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
737         break;
738     case AV_CODEC_ID_MSMPEG4V2:
739         s->out_format      = FMT_H263;
740         s->h263_pred       = 1;
741         s->unrestricted_mv = 1;
742         s->msmpeg4_version = 2;
743         avctx->delay       = 0;
744         s->low_delay       = 1;
745         break;
746     case AV_CODEC_ID_MSMPEG4V3:
747         s->out_format        = FMT_H263;
748         s->h263_pred         = 1;
749         s->unrestricted_mv   = 1;
750         s->msmpeg4_version   = 3;
751         s->flipflop_rounding = 1;
752         avctx->delay         = 0;
753         s->low_delay         = 1;
754         break;
755     case AV_CODEC_ID_WMV1:
756         s->out_format        = FMT_H263;
757         s->h263_pred         = 1;
758         s->unrestricted_mv   = 1;
759         s->msmpeg4_version   = 4;
760         s->flipflop_rounding = 1;
761         avctx->delay         = 0;
762         s->low_delay         = 1;
763         break;
764     case AV_CODEC_ID_WMV2:
765         s->out_format        = FMT_H263;
766         s->h263_pred         = 1;
767         s->unrestricted_mv   = 1;
768         s->msmpeg4_version   = 5;
769         s->flipflop_rounding = 1;
770         avctx->delay         = 0;
771         s->low_delay         = 1;
772         break;
773     default:
774         return -1;
775     }
776
777     avctx->has_b_frames = !s->low_delay;
778
779     s->encoding = 1;
780
781     s->progressive_frame    =
782     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
783                                                 CODEC_FLAG_INTERLACED_ME) ||
784                                 s->alternate_scan);
785
786     /* init */
787     if (ff_MPV_common_init(s) < 0)
788         return -1;
789
790     if (ARCH_X86)
791         ff_MPV_encode_init_x86(s);
792
793     if (!s->dct_quantize)
794         s->dct_quantize = ff_dct_quantize_c;
795     if (!s->denoise_dct)
796         s->denoise_dct  = denoise_dct_c;
797     s->fast_dct_quantize = s->dct_quantize;
798     if (avctx->trellis)
799         s->dct_quantize  = dct_quantize_trellis_c;
800
801     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
802         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
803
804     s->quant_precision = 5;
805
806     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
807     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
808
809     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
810         ff_h261_encode_init(s);
811     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
812         ff_h263_encode_init(s);
813     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
814         ff_msmpeg4_encode_init(s);
815     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
816         && s->out_format == FMT_MPEG1)
817         ff_mpeg1_encode_init(s);
818
819     /* init q matrix */
820     for (i = 0; i < 64; i++) {
821         int j = s->dsp.idct_permutation[i];
822         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
823             s->mpeg_quant) {
824             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
825             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
826         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
827             s->intra_matrix[j] =
828             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
829         } else {
830             /* mpeg1/2 */
831             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
832             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
833         }
834         if (s->avctx->intra_matrix)
835             s->intra_matrix[j] = s->avctx->intra_matrix[i];
836         if (s->avctx->inter_matrix)
837             s->inter_matrix[j] = s->avctx->inter_matrix[i];
838     }
839
840     /* precompute matrix */
841     /* for mjpeg, we do include qscale in the matrix */
842     if (s->out_format != FMT_MJPEG) {
843         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
844                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
845                           31, 1);
846         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
847                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
848                           31, 0);
849     }
850
851     if (ff_rate_control_init(s) < 0)
852         return -1;
853
854     return 0;
855 }
856
857 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
858 {
859     MpegEncContext *s = avctx->priv_data;
860
861     ff_rate_control_uninit(s);
862
863     ff_MPV_common_end(s);
864     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
865         s->out_format == FMT_MJPEG)
866         ff_mjpeg_encode_close(s);
867
868     av_freep(&avctx->extradata);
869
870     return 0;
871 }
872
873 static int get_sae(uint8_t *src, int ref, int stride)
874 {
875     int x,y;
876     int acc = 0;
877
878     for (y = 0; y < 16; y++) {
879         for (x = 0; x < 16; x++) {
880             acc += FFABS(src[x + y * stride] - ref);
881         }
882     }
883
884     return acc;
885 }
886
887 static int get_intra_count(MpegEncContext *s, uint8_t *src,
888                            uint8_t *ref, int stride)
889 {
890     int x, y, w, h;
891     int acc = 0;
892
893     w = s->width  & ~15;
894     h = s->height & ~15;
895
896     for (y = 0; y < h; y += 16) {
897         for (x = 0; x < w; x += 16) {
898             int offset = x + y * stride;
899             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
900                                      16);
901             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
902             int sae  = get_sae(src + offset, mean, stride);
903
904             acc += sae + 500 < sad;
905         }
906     }
907     return acc;
908 }
909
910
911 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
912 {
913     AVFrame *pic = NULL;
914     int64_t pts;
915     int i, display_picture_number = 0;
916     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
917                                                  (s->low_delay ? 0 : 1);
918     int direct = 1;
919
920     if (pic_arg) {
921         pts = pic_arg->pts;
922         display_picture_number = s->input_picture_number++;
923
924         if (pts != AV_NOPTS_VALUE) {
925             if (s->user_specified_pts != AV_NOPTS_VALUE) {
926                 int64_t time = pts;
927                 int64_t last = s->user_specified_pts;
928
929                 if (time <= last) {
930                     av_log(s->avctx, AV_LOG_ERROR,
931                            "Error, Invalid timestamp=%"PRId64", "
932                            "last=%"PRId64"\n", pts, s->user_specified_pts);
933                     return -1;
934                 }
935
936                 if (!s->low_delay && display_picture_number == 1)
937                     s->dts_delta = time - last;
938             }
939             s->user_specified_pts = pts;
940         } else {
941             if (s->user_specified_pts != AV_NOPTS_VALUE) {
942                 s->user_specified_pts =
943                 pts = s->user_specified_pts + 1;
944                 av_log(s->avctx, AV_LOG_INFO,
945                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
946                        pts);
947             } else {
948                 pts = display_picture_number;
949             }
950         }
951     }
952
953     if (pic_arg) {
954         if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
955             direct = 0;
956         if (pic_arg->linesize[0] != s->linesize)
957             direct = 0;
958         if (pic_arg->linesize[1] != s->uvlinesize)
959             direct = 0;
960         if (pic_arg->linesize[2] != s->uvlinesize)
961             direct = 0;
962
963         av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
964                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
965
966         if (direct) {
967             i = ff_find_unused_picture(s, 1);
968             if (i < 0)
969                 return i;
970
971             pic = &s->picture[i].f;
972             pic->reference = 3;
973
974             for (i = 0; i < 4; i++) {
975                 pic->data[i]     = pic_arg->data[i];
976                 pic->linesize[i] = pic_arg->linesize[i];
977             }
978             if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
979                 return -1;
980             }
981         } else {
982             i = ff_find_unused_picture(s, 0);
983             if (i < 0)
984                 return i;
985
986             pic = &s->picture[i].f;
987             pic->reference = 3;
988
989             if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
990                 return -1;
991             }
992
993             if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
994                 pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
995                 pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
996                 // empty
997             } else {
998                 int h_chroma_shift, v_chroma_shift;
999                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1000                                                  &h_chroma_shift,
1001                                                  &v_chroma_shift);
1002
1003                 for (i = 0; i < 3; i++) {
1004                     int src_stride = pic_arg->linesize[i];
1005                     int dst_stride = i ? s->uvlinesize : s->linesize;
1006                     int h_shift = i ? h_chroma_shift : 0;
1007                     int v_shift = i ? v_chroma_shift : 0;
1008                     int w = s->width  >> h_shift;
1009                     int h = s->height >> v_shift;
1010                     uint8_t *src = pic_arg->data[i];
1011                     uint8_t *dst = pic->data[i];
1012
1013                     if (!s->avctx->rc_buffer_size)
1014                         dst += INPLACE_OFFSET;
1015
1016                     if (src_stride == dst_stride)
1017                         memcpy(dst, src, src_stride * h);
1018                     else {
1019                         while (h--) {
1020                             memcpy(dst, src, w);
1021                             dst += dst_stride;
1022                             src += src_stride;
1023                         }
1024                     }
1025                 }
1026             }
1027         }
1028         copy_picture_attributes(s, pic, pic_arg);
1029         pic->display_picture_number = display_picture_number;
1030         pic->pts = pts; // we set this here to avoid modifiying pic_arg
1031     }
1032
1033     /* shift buffer entries */
1034     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1035         s->input_picture[i - 1] = s->input_picture[i];
1036
1037     s->input_picture[encoding_delay] = (Picture*) pic;
1038
1039     return 0;
1040 }
1041
1042 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1043 {
1044     int x, y, plane;
1045     int score = 0;
1046     int64_t score64 = 0;
1047
1048     for (plane = 0; plane < 3; plane++) {
1049         const int stride = p->f.linesize[plane];
1050         const int bw = plane ? 1 : 2;
1051         for (y = 0; y < s->mb_height * bw; y++) {
1052             for (x = 0; x < s->mb_width * bw; x++) {
1053                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1054                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1055                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1056                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1057
1058                 switch (s->avctx->frame_skip_exp) {
1059                 case 0: score    =  FFMAX(score, v);          break;
1060                 case 1: score   += FFABS(v);                  break;
1061                 case 2: score   += v * v;                     break;
1062                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1063                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1064                 }
1065             }
1066         }
1067     }
1068
1069     if (score)
1070         score64 = score;
1071
1072     if (score64 < s->avctx->frame_skip_threshold)
1073         return 1;
1074     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1075         return 1;
1076     return 0;
1077 }
1078
1079 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1080 {
1081     AVPacket pkt = { 0 };
1082     int ret, got_output;
1083
1084     av_init_packet(&pkt);
1085     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1086     if (ret < 0)
1087         return ret;
1088
1089     ret = pkt.size;
1090     av_free_packet(&pkt);
1091     return ret;
1092 }
1093
1094 static int estimate_best_b_count(MpegEncContext *s)
1095 {
1096     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1097     AVCodecContext *c = avcodec_alloc_context3(NULL);
1098     AVFrame input[FF_MAX_B_FRAMES + 2];
1099     const int scale = s->avctx->brd_scale;
1100     int i, j, out_size, p_lambda, b_lambda, lambda2;
1101     int64_t best_rd  = INT64_MAX;
1102     int best_b_count = -1;
1103
1104     assert(scale >= 0 && scale <= 3);
1105
1106     //emms_c();
1107     //s->next_picture_ptr->quality;
1108     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1109     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1110     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1111     if (!b_lambda) // FIXME we should do this somewhere else
1112         b_lambda = p_lambda;
1113     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1114                FF_LAMBDA_SHIFT;
1115
1116     c->width        = s->width  >> scale;
1117     c->height       = s->height >> scale;
1118     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1119                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1120     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1121     c->mb_decision  = s->avctx->mb_decision;
1122     c->me_cmp       = s->avctx->me_cmp;
1123     c->mb_cmp       = s->avctx->mb_cmp;
1124     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1125     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1126     c->time_base    = s->avctx->time_base;
1127     c->max_b_frames = s->max_b_frames;
1128
1129     if (avcodec_open2(c, codec, NULL) < 0)
1130         return -1;
1131
1132     for (i = 0; i < s->max_b_frames + 2; i++) {
1133         int ysize = c->width * c->height;
1134         int csize = (c->width / 2) * (c->height / 2);
1135         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1136                                                 s->next_picture_ptr;
1137
1138         avcodec_get_frame_defaults(&input[i]);
1139         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1140         input[i].data[1]     = input[i].data[0] + ysize;
1141         input[i].data[2]     = input[i].data[1] + csize;
1142         input[i].linesize[0] = c->width;
1143         input[i].linesize[1] =
1144         input[i].linesize[2] = c->width / 2;
1145
1146         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1147             pre_input = *pre_input_ptr;
1148
1149             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1150                 pre_input.f.data[0] += INPLACE_OFFSET;
1151                 pre_input.f.data[1] += INPLACE_OFFSET;
1152                 pre_input.f.data[2] += INPLACE_OFFSET;
1153             }
1154
1155             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1156                                  pre_input.f.data[0], pre_input.f.linesize[0],
1157                                  c->width,      c->height);
1158             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1159                                  pre_input.f.data[1], pre_input.f.linesize[1],
1160                                  c->width >> 1, c->height >> 1);
1161             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1162                                  pre_input.f.data[2], pre_input.f.linesize[2],
1163                                  c->width >> 1, c->height >> 1);
1164         }
1165     }
1166
1167     for (j = 0; j < s->max_b_frames + 1; j++) {
1168         int64_t rd = 0;
1169
1170         if (!s->input_picture[j])
1171             break;
1172
1173         c->error[0] = c->error[1] = c->error[2] = 0;
1174
1175         input[0].pict_type = AV_PICTURE_TYPE_I;
1176         input[0].quality   = 1 * FF_QP2LAMBDA;
1177
1178         out_size = encode_frame(c, &input[0]);
1179
1180         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1181
1182         for (i = 0; i < s->max_b_frames + 1; i++) {
1183             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1184
1185             input[i + 1].pict_type = is_p ?
1186                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1187             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1188
1189             out_size = encode_frame(c, &input[i + 1]);
1190
1191             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1192         }
1193
1194         /* get the delayed frames */
1195         while (out_size) {
1196             out_size = encode_frame(c, NULL);
1197             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1198         }
1199
1200         rd += c->error[0] + c->error[1] + c->error[2];
1201
1202         if (rd < best_rd) {
1203             best_rd = rd;
1204             best_b_count = j;
1205         }
1206     }
1207
1208     avcodec_close(c);
1209     av_freep(&c);
1210
1211     for (i = 0; i < s->max_b_frames + 2; i++) {
1212         av_freep(&input[i].data[0]);
1213     }
1214
1215     return best_b_count;
1216 }
1217
1218 static int select_input_picture(MpegEncContext *s)
1219 {
1220     int i;
1221
1222     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1223         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1224     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1225
1226     /* set next picture type & ordering */
1227     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1228         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1229             s->next_picture_ptr == NULL || s->intra_only) {
1230             s->reordered_input_picture[0] = s->input_picture[0];
1231             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1232             s->reordered_input_picture[0]->f.coded_picture_number =
1233                 s->coded_picture_number++;
1234         } else {
1235             int b_frames;
1236
1237             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1238                 if (s->picture_in_gop_number < s->gop_size &&
1239                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1240                     // FIXME check that te gop check above is +-1 correct
1241                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1242                         for (i = 0; i < 4; i++)
1243                             s->input_picture[0]->f.data[i] = NULL;
1244                         s->input_picture[0]->f.type = 0;
1245                     } else {
1246                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1247                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1248
1249                         s->avctx->release_buffer(s->avctx,
1250                                                  &s->input_picture[0]->f);
1251                     }
1252
1253                     emms_c();
1254                     ff_vbv_update(s, 0);
1255
1256                     goto no_output_pic;
1257                 }
1258             }
1259
1260             if (s->flags & CODEC_FLAG_PASS2) {
1261                 for (i = 0; i < s->max_b_frames + 1; i++) {
1262                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1263
1264                     if (pict_num >= s->rc_context.num_entries)
1265                         break;
1266                     if (!s->input_picture[i]) {
1267                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1268                         break;
1269                     }
1270
1271                     s->input_picture[i]->f.pict_type =
1272                         s->rc_context.entry[pict_num].new_pict_type;
1273                 }
1274             }
1275
1276             if (s->avctx->b_frame_strategy == 0) {
1277                 b_frames = s->max_b_frames;
1278                 while (b_frames && !s->input_picture[b_frames])
1279                     b_frames--;
1280             } else if (s->avctx->b_frame_strategy == 1) {
1281                 for (i = 1; i < s->max_b_frames + 1; i++) {
1282                     if (s->input_picture[i] &&
1283                         s->input_picture[i]->b_frame_score == 0) {
1284                         s->input_picture[i]->b_frame_score =
1285                             get_intra_count(s,
1286                                             s->input_picture[i    ]->f.data[0],
1287                                             s->input_picture[i - 1]->f.data[0],
1288                                             s->linesize) + 1;
1289                     }
1290                 }
1291                 for (i = 0; i < s->max_b_frames + 1; i++) {
1292                     if (s->input_picture[i] == NULL ||
1293                         s->input_picture[i]->b_frame_score - 1 >
1294                             s->mb_num / s->avctx->b_sensitivity)
1295                         break;
1296                 }
1297
1298                 b_frames = FFMAX(0, i - 1);
1299
1300                 /* reset scores */
1301                 for (i = 0; i < b_frames + 1; i++) {
1302                     s->input_picture[i]->b_frame_score = 0;
1303                 }
1304             } else if (s->avctx->b_frame_strategy == 2) {
1305                 b_frames = estimate_best_b_count(s);
1306             } else {
1307                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1308                 b_frames = 0;
1309             }
1310
1311             emms_c();
1312
1313             for (i = b_frames - 1; i >= 0; i--) {
1314                 int type = s->input_picture[i]->f.pict_type;
1315                 if (type && type != AV_PICTURE_TYPE_B)
1316                     b_frames = i;
1317             }
1318             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1319                 b_frames == s->max_b_frames) {
1320                 av_log(s->avctx, AV_LOG_ERROR,
1321                        "warning, too many b frames in a row\n");
1322             }
1323
1324             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1325                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1326                     s->gop_size > s->picture_in_gop_number) {
1327                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1328                 } else {
1329                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1330                         b_frames = 0;
1331                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1332                 }
1333             }
1334
1335             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1336                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1337                 b_frames--;
1338
1339             s->reordered_input_picture[0] = s->input_picture[b_frames];
1340             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1341                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1342             s->reordered_input_picture[0]->f.coded_picture_number =
1343                 s->coded_picture_number++;
1344             for (i = 0; i < b_frames; i++) {
1345                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1346                 s->reordered_input_picture[i + 1]->f.pict_type =
1347                     AV_PICTURE_TYPE_B;
1348                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1349                     s->coded_picture_number++;
1350             }
1351         }
1352     }
1353 no_output_pic:
1354     if (s->reordered_input_picture[0]) {
1355         s->reordered_input_picture[0]->f.reference =
1356            s->reordered_input_picture[0]->f.pict_type !=
1357                AV_PICTURE_TYPE_B ? 3 : 0;
1358
1359         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1360
1361         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1362             s->avctx->rc_buffer_size) {
1363             // input is a shared pix, so we can't modifiy it -> alloc a new
1364             // one & ensure that the shared one is reuseable
1365
1366             Picture *pic;
1367             int i = ff_find_unused_picture(s, 0);
1368             if (i < 0)
1369                 return i;
1370             pic = &s->picture[i];
1371
1372             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1373             if (ff_alloc_picture(s, pic, 0) < 0) {
1374                 return -1;
1375             }
1376
1377             /* mark us unused / free shared pic */
1378             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1379                 s->avctx->release_buffer(s->avctx,
1380                                          &s->reordered_input_picture[0]->f);
1381             for (i = 0; i < 4; i++)
1382                 s->reordered_input_picture[0]->f.data[i] = NULL;
1383             s->reordered_input_picture[0]->f.type = 0;
1384
1385             copy_picture_attributes(s, &pic->f,
1386                                     &s->reordered_input_picture[0]->f);
1387
1388             s->current_picture_ptr = pic;
1389         } else {
1390             // input is not a shared pix -> reuse buffer for current_pix
1391
1392             assert(s->reordered_input_picture[0]->f.type ==
1393                        FF_BUFFER_TYPE_USER ||
1394                    s->reordered_input_picture[0]->f.type ==
1395                        FF_BUFFER_TYPE_INTERNAL);
1396
1397             s->current_picture_ptr = s->reordered_input_picture[0];
1398             for (i = 0; i < 4; i++) {
1399                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1400             }
1401         }
1402         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1403
1404         s->picture_number = s->new_picture.f.display_picture_number;
1405     } else {
1406         memset(&s->new_picture, 0, sizeof(Picture));
1407     }
1408     return 0;
1409 }
1410
1411 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1412                           const AVFrame *pic_arg, int *got_packet)
1413 {
1414     MpegEncContext *s = avctx->priv_data;
1415     int i, stuffing_count, ret;
1416     int context_count = s->slice_context_count;
1417
1418     s->picture_in_gop_number++;
1419
1420     if (load_input_picture(s, pic_arg) < 0)
1421         return -1;
1422
1423     if (select_input_picture(s) < 0) {
1424         return -1;
1425     }
1426
1427     /* output? */
1428     if (s->new_picture.f.data[0]) {
1429         if (!pkt->data &&
1430             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1431             return ret;
1432         if (s->mb_info) {
1433             s->mb_info_ptr = av_packet_new_side_data(pkt,
1434                                  AV_PKT_DATA_H263_MB_INFO,
1435                                  s->mb_width*s->mb_height*12);
1436             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1437         }
1438
1439         for (i = 0; i < context_count; i++) {
1440             int start_y = s->thread_context[i]->start_mb_y;
1441             int   end_y = s->thread_context[i]->  end_mb_y;
1442             int h       = s->mb_height;
1443             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1444             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1445
1446             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1447         }
1448
1449         s->pict_type = s->new_picture.f.pict_type;
1450         //emms_c();
1451         ff_MPV_frame_start(s, avctx);
1452 vbv_retry:
1453         if (encode_picture(s, s->picture_number) < 0)
1454             return -1;
1455
1456         avctx->header_bits = s->header_bits;
1457         avctx->mv_bits     = s->mv_bits;
1458         avctx->misc_bits   = s->misc_bits;
1459         avctx->i_tex_bits  = s->i_tex_bits;
1460         avctx->p_tex_bits  = s->p_tex_bits;
1461         avctx->i_count     = s->i_count;
1462         // FIXME f/b_count in avctx
1463         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1464         avctx->skip_count  = s->skip_count;
1465
1466         ff_MPV_frame_end(s);
1467
1468         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1469             ff_mjpeg_encode_picture_trailer(s);
1470
1471         if (avctx->rc_buffer_size) {
1472             RateControlContext *rcc = &s->rc_context;
1473             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1474
1475             if (put_bits_count(&s->pb) > max_size &&
1476                 s->lambda < s->avctx->lmax) {
1477                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1478                                        (s->qscale + 1) / s->qscale);
1479                 if (s->adaptive_quant) {
1480                     int i;
1481                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1482                         s->lambda_table[i] =
1483                             FFMAX(s->lambda_table[i] + 1,
1484                                   s->lambda_table[i] * (s->qscale + 1) /
1485                                   s->qscale);
1486                 }
1487                 s->mb_skipped = 0;        // done in MPV_frame_start()
1488                 // done in encode_picture() so we must undo it
1489                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1490                     if (s->flipflop_rounding          ||
1491                         s->codec_id == AV_CODEC_ID_H263P ||
1492                         s->codec_id == AV_CODEC_ID_MPEG4)
1493                         s->no_rounding ^= 1;
1494                 }
1495                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1496                     s->time_base       = s->last_time_base;
1497                     s->last_non_b_time = s->time - s->pp_time;
1498                 }
1499                 for (i = 0; i < context_count; i++) {
1500                     PutBitContext *pb = &s->thread_context[i]->pb;
1501                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1502                 }
1503                 goto vbv_retry;
1504             }
1505
1506             assert(s->avctx->rc_max_rate);
1507         }
1508
1509         if (s->flags & CODEC_FLAG_PASS1)
1510             ff_write_pass1_stats(s);
1511
1512         for (i = 0; i < 4; i++) {
1513             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1514             avctx->error[i] += s->current_picture_ptr->f.error[i];
1515         }
1516
1517         if (s->flags & CODEC_FLAG_PASS1)
1518             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1519                    avctx->i_tex_bits + avctx->p_tex_bits ==
1520                        put_bits_count(&s->pb));
1521         flush_put_bits(&s->pb);
1522         s->frame_bits  = put_bits_count(&s->pb);
1523
1524         stuffing_count = ff_vbv_update(s, s->frame_bits);
1525         if (stuffing_count) {
1526             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1527                     stuffing_count + 50) {
1528                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1529                 return -1;
1530             }
1531
1532             switch (s->codec_id) {
1533             case AV_CODEC_ID_MPEG1VIDEO:
1534             case AV_CODEC_ID_MPEG2VIDEO:
1535                 while (stuffing_count--) {
1536                     put_bits(&s->pb, 8, 0);
1537                 }
1538             break;
1539             case AV_CODEC_ID_MPEG4:
1540                 put_bits(&s->pb, 16, 0);
1541                 put_bits(&s->pb, 16, 0x1C3);
1542                 stuffing_count -= 4;
1543                 while (stuffing_count--) {
1544                     put_bits(&s->pb, 8, 0xFF);
1545                 }
1546             break;
1547             default:
1548                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1549             }
1550             flush_put_bits(&s->pb);
1551             s->frame_bits  = put_bits_count(&s->pb);
1552         }
1553
1554         /* update mpeg1/2 vbv_delay for CBR */
1555         if (s->avctx->rc_max_rate                          &&
1556             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1557             s->out_format == FMT_MPEG1                     &&
1558             90000LL * (avctx->rc_buffer_size - 1) <=
1559                 s->avctx->rc_max_rate * 0xFFFFLL) {
1560             int vbv_delay, min_delay;
1561             double inbits  = s->avctx->rc_max_rate *
1562                              av_q2d(s->avctx->time_base);
1563             int    minbits = s->frame_bits - 8 *
1564                              (s->vbv_delay_ptr - s->pb.buf - 1);
1565             double bits    = s->rc_context.buffer_index + minbits - inbits;
1566
1567             if (bits < 0)
1568                 av_log(s->avctx, AV_LOG_ERROR,
1569                        "Internal error, negative bits\n");
1570
1571             assert(s->repeat_first_field == 0);
1572
1573             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1574             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1575                         s->avctx->rc_max_rate;
1576
1577             vbv_delay = FFMAX(vbv_delay, min_delay);
1578
1579             assert(vbv_delay < 0xFFFF);
1580
1581             s->vbv_delay_ptr[0] &= 0xF8;
1582             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1583             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1584             s->vbv_delay_ptr[2] &= 0x07;
1585             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1586             avctx->vbv_delay     = vbv_delay * 300;
1587         }
1588         s->total_bits     += s->frame_bits;
1589         avctx->frame_bits  = s->frame_bits;
1590
1591         pkt->pts = s->current_picture.f.pts;
1592         if (!s->low_delay) {
1593             if (!s->current_picture.f.coded_picture_number)
1594                 pkt->dts = pkt->pts - s->dts_delta;
1595             else
1596                 pkt->dts = s->reordered_pts;
1597             s->reordered_pts = s->input_picture[0]->f.pts;
1598         } else
1599             pkt->dts = pkt->pts;
1600         if (s->current_picture.f.key_frame)
1601             pkt->flags |= AV_PKT_FLAG_KEY;
1602         if (s->mb_info)
1603             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1604     } else {
1605         s->frame_bits = 0;
1606     }
1607     assert((s->frame_bits & 7) == 0);
1608
1609     pkt->size = s->frame_bits / 8;
1610     *got_packet = !!pkt->size;
1611     return 0;
1612 }
1613
1614 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1615                                                 int n, int threshold)
1616 {
1617     static const char tab[64] = {
1618         3, 2, 2, 1, 1, 1, 1, 1,
1619         1, 1, 1, 1, 1, 1, 1, 1,
1620         1, 1, 1, 1, 1, 1, 1, 1,
1621         0, 0, 0, 0, 0, 0, 0, 0,
1622         0, 0, 0, 0, 0, 0, 0, 0,
1623         0, 0, 0, 0, 0, 0, 0, 0,
1624         0, 0, 0, 0, 0, 0, 0, 0,
1625         0, 0, 0, 0, 0, 0, 0, 0
1626     };
1627     int score = 0;
1628     int run = 0;
1629     int i;
1630     int16_t *block = s->block[n];
1631     const int last_index = s->block_last_index[n];
1632     int skip_dc;
1633
1634     if (threshold < 0) {
1635         skip_dc = 0;
1636         threshold = -threshold;
1637     } else
1638         skip_dc = 1;
1639
1640     /* Are all we could set to zero already zero? */
1641     if (last_index <= skip_dc - 1)
1642         return;
1643
1644     for (i = 0; i <= last_index; i++) {
1645         const int j = s->intra_scantable.permutated[i];
1646         const int level = FFABS(block[j]);
1647         if (level == 1) {
1648             if (skip_dc && i == 0)
1649                 continue;
1650             score += tab[run];
1651             run = 0;
1652         } else if (level > 1) {
1653             return;
1654         } else {
1655             run++;
1656         }
1657     }
1658     if (score >= threshold)
1659         return;
1660     for (i = skip_dc; i <= last_index; i++) {
1661         const int j = s->intra_scantable.permutated[i];
1662         block[j] = 0;
1663     }
1664     if (block[0])
1665         s->block_last_index[n] = 0;
1666     else
1667         s->block_last_index[n] = -1;
1668 }
1669
1670 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1671                                int last_index)
1672 {
1673     int i;
1674     const int maxlevel = s->max_qcoeff;
1675     const int minlevel = s->min_qcoeff;
1676     int overflow = 0;
1677
1678     if (s->mb_intra) {
1679         i = 1; // skip clipping of intra dc
1680     } else
1681         i = 0;
1682
1683     for (; i <= last_index; i++) {
1684         const int j = s->intra_scantable.permutated[i];
1685         int level = block[j];
1686
1687         if (level > maxlevel) {
1688             level = maxlevel;
1689             overflow++;
1690         } else if (level < minlevel) {
1691             level = minlevel;
1692             overflow++;
1693         }
1694
1695         block[j] = level;
1696     }
1697
1698     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1699         av_log(s->avctx, AV_LOG_INFO,
1700                "warning, clipping %d dct coefficients to %d..%d\n",
1701                overflow, minlevel, maxlevel);
1702 }
1703
1704 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1705 {
1706     int x, y;
1707     // FIXME optimize
1708     for (y = 0; y < 8; y++) {
1709         for (x = 0; x < 8; x++) {
1710             int x2, y2;
1711             int sum = 0;
1712             int sqr = 0;
1713             int count = 0;
1714
1715             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1716                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1717                     int v = ptr[x2 + y2 * stride];
1718                     sum += v;
1719                     sqr += v * v;
1720                     count++;
1721                 }
1722             }
1723             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1724         }
1725     }
1726 }
1727
1728 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1729                                                 int motion_x, int motion_y,
1730                                                 int mb_block_height,
1731                                                 int mb_block_count)
1732 {
1733     int16_t weight[8][64];
1734     int16_t orig[8][64];
1735     const int mb_x = s->mb_x;
1736     const int mb_y = s->mb_y;
1737     int i;
1738     int skip_dct[8];
1739     int dct_offset = s->linesize * 8; // default for progressive frames
1740     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1741     int wrap_y, wrap_c;
1742
1743     for (i = 0; i < mb_block_count; i++)
1744         skip_dct[i] = s->skipdct;
1745
1746     if (s->adaptive_quant) {
1747         const int last_qp = s->qscale;
1748         const int mb_xy = mb_x + mb_y * s->mb_stride;
1749
1750         s->lambda = s->lambda_table[mb_xy];
1751         update_qscale(s);
1752
1753         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1754             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1755             s->dquant = s->qscale - last_qp;
1756
1757             if (s->out_format == FMT_H263) {
1758                 s->dquant = av_clip(s->dquant, -2, 2);
1759
1760                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1761                     if (!s->mb_intra) {
1762                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1763                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1764                                 s->dquant = 0;
1765                         }
1766                         if (s->mv_type == MV_TYPE_8X8)
1767                             s->dquant = 0;
1768                     }
1769                 }
1770             }
1771         }
1772         ff_set_qscale(s, last_qp + s->dquant);
1773     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1774         ff_set_qscale(s, s->qscale + s->dquant);
1775
1776     wrap_y = s->linesize;
1777     wrap_c = s->uvlinesize;
1778     ptr_y  = s->new_picture.f.data[0] +
1779              (mb_y * 16 * wrap_y)              + mb_x * 16;
1780     ptr_cb = s->new_picture.f.data[1] +
1781              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1782     ptr_cr = s->new_picture.f.data[2] +
1783              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1784
1785     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1786         uint8_t *ebuf = s->edge_emu_buffer + 32;
1787         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1788                                  mb_y * 16, s->width, s->height);
1789         ptr_y = ebuf;
1790         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1791                                  mb_block_height, mb_x * 8, mb_y * 8,
1792                                  s->width >> 1, s->height >> 1);
1793         ptr_cb = ebuf + 18 * wrap_y;
1794         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1795                                  mb_block_height, mb_x * 8, mb_y * 8,
1796                                  s->width >> 1, s->height >> 1);
1797         ptr_cr = ebuf + 18 * wrap_y + 8;
1798     }
1799
1800     if (s->mb_intra) {
1801         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1802             int progressive_score, interlaced_score;
1803
1804             s->interlaced_dct = 0;
1805             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1806                                                     NULL, wrap_y, 8) +
1807                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1808                                                     NULL, wrap_y, 8) - 400;
1809
1810             if (progressive_score > 0) {
1811                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1812                                                        NULL, wrap_y * 2, 8) +
1813                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1814                                                        NULL, wrap_y * 2, 8);
1815                 if (progressive_score > interlaced_score) {
1816                     s->interlaced_dct = 1;
1817
1818                     dct_offset = wrap_y;
1819                     wrap_y <<= 1;
1820                     if (s->chroma_format == CHROMA_422)
1821                         wrap_c <<= 1;
1822                 }
1823             }
1824         }
1825
1826         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1827         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1828         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1829         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1830
1831         if (s->flags & CODEC_FLAG_GRAY) {
1832             skip_dct[4] = 1;
1833             skip_dct[5] = 1;
1834         } else {
1835             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1836             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1837             if (!s->chroma_y_shift) { /* 422 */
1838                 s->dsp.get_pixels(s->block[6],
1839                                   ptr_cb + (dct_offset >> 1), wrap_c);
1840                 s->dsp.get_pixels(s->block[7],
1841                                   ptr_cr + (dct_offset >> 1), wrap_c);
1842             }
1843         }
1844     } else {
1845         op_pixels_func (*op_pix)[4];
1846         qpel_mc_func (*op_qpix)[16];
1847         uint8_t *dest_y, *dest_cb, *dest_cr;
1848
1849         dest_y  = s->dest[0];
1850         dest_cb = s->dest[1];
1851         dest_cr = s->dest[2];
1852
1853         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1854             op_pix  = s->dsp.put_pixels_tab;
1855             op_qpix = s->dsp.put_qpel_pixels_tab;
1856         } else {
1857             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1858             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1859         }
1860
1861         if (s->mv_dir & MV_DIR_FORWARD) {
1862             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1863                           s->last_picture.f.data,
1864                           op_pix, op_qpix);
1865             op_pix  = s->dsp.avg_pixels_tab;
1866             op_qpix = s->dsp.avg_qpel_pixels_tab;
1867         }
1868         if (s->mv_dir & MV_DIR_BACKWARD) {
1869             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1870                           s->next_picture.f.data,
1871                           op_pix, op_qpix);
1872         }
1873
1874         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1875             int progressive_score, interlaced_score;
1876
1877             s->interlaced_dct = 0;
1878             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1879                                                     ptr_y,              wrap_y,
1880                                                     8) +
1881                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1882                                                     ptr_y + wrap_y * 8, wrap_y,
1883                                                     8) - 400;
1884
1885             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1886                 progressive_score -= 400;
1887
1888             if (progressive_score > 0) {
1889                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1890                                                        ptr_y,
1891                                                        wrap_y * 2, 8) +
1892                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1893                                                        ptr_y + wrap_y,
1894                                                        wrap_y * 2, 8);
1895
1896                 if (progressive_score > interlaced_score) {
1897                     s->interlaced_dct = 1;
1898
1899                     dct_offset = wrap_y;
1900                     wrap_y <<= 1;
1901                     if (s->chroma_format == CHROMA_422)
1902                         wrap_c <<= 1;
1903                 }
1904             }
1905         }
1906
1907         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1908         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1909         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1910                            dest_y + dct_offset, wrap_y);
1911         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1912                            dest_y + dct_offset + 8, wrap_y);
1913
1914         if (s->flags & CODEC_FLAG_GRAY) {
1915             skip_dct[4] = 1;
1916             skip_dct[5] = 1;
1917         } else {
1918             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1919             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1920             if (!s->chroma_y_shift) { /* 422 */
1921                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1922                                    dest_cb + (dct_offset >> 1), wrap_c);
1923                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1924                                    dest_cr + (dct_offset >> 1), wrap_c);
1925             }
1926         }
1927         /* pre quantization */
1928         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1929                 2 * s->qscale * s->qscale) {
1930             // FIXME optimize
1931             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1932                               wrap_y, 8) < 20 * s->qscale)
1933                 skip_dct[0] = 1;
1934             if (s->dsp.sad[1](NULL, ptr_y + 8,
1935                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1936                 skip_dct[1] = 1;
1937             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1938                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1939                 skip_dct[2] = 1;
1940             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1941                               dest_y + dct_offset + 8,
1942                               wrap_y, 8) < 20 * s->qscale)
1943                 skip_dct[3] = 1;
1944             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1945                               wrap_c, 8) < 20 * s->qscale)
1946                 skip_dct[4] = 1;
1947             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1948                               wrap_c, 8) < 20 * s->qscale)
1949                 skip_dct[5] = 1;
1950             if (!s->chroma_y_shift) { /* 422 */
1951                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1952                                   dest_cb + (dct_offset >> 1),
1953                                   wrap_c, 8) < 20 * s->qscale)
1954                     skip_dct[6] = 1;
1955                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1956                                   dest_cr + (dct_offset >> 1),
1957                                   wrap_c, 8) < 20 * s->qscale)
1958                     skip_dct[7] = 1;
1959             }
1960         }
1961     }
1962
1963     if (s->quantizer_noise_shaping) {
1964         if (!skip_dct[0])
1965             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1966         if (!skip_dct[1])
1967             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1968         if (!skip_dct[2])
1969             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1970         if (!skip_dct[3])
1971             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1972         if (!skip_dct[4])
1973             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1974         if (!skip_dct[5])
1975             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1976         if (!s->chroma_y_shift) { /* 422 */
1977             if (!skip_dct[6])
1978                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1979                                   wrap_c);
1980             if (!skip_dct[7])
1981                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1982                                   wrap_c);
1983         }
1984         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1985     }
1986
1987     /* DCT & quantize */
1988     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1989     {
1990         for (i = 0; i < mb_block_count; i++) {
1991             if (!skip_dct[i]) {
1992                 int overflow;
1993                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1994                 // FIXME we could decide to change to quantizer instead of
1995                 // clipping
1996                 // JS: I don't think that would be a good idea it could lower
1997                 //     quality instead of improve it. Just INTRADC clipping
1998                 //     deserves changes in quantizer
1999                 if (overflow)
2000                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2001             } else
2002                 s->block_last_index[i] = -1;
2003         }
2004         if (s->quantizer_noise_shaping) {
2005             for (i = 0; i < mb_block_count; i++) {
2006                 if (!skip_dct[i]) {
2007                     s->block_last_index[i] =
2008                         dct_quantize_refine(s, s->block[i], weight[i],
2009                                             orig[i], i, s->qscale);
2010                 }
2011             }
2012         }
2013
2014         if (s->luma_elim_threshold && !s->mb_intra)
2015             for (i = 0; i < 4; i++)
2016                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2017         if (s->chroma_elim_threshold && !s->mb_intra)
2018             for (i = 4; i < mb_block_count; i++)
2019                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2020
2021         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2022             for (i = 0; i < mb_block_count; i++) {
2023                 if (s->block_last_index[i] == -1)
2024                     s->coded_score[i] = INT_MAX / 256;
2025             }
2026         }
2027     }
2028
2029     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2030         s->block_last_index[4] =
2031         s->block_last_index[5] = 0;
2032         s->block[4][0] =
2033         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2034     }
2035
2036     // non c quantize code returns incorrect block_last_index FIXME
2037     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2038         for (i = 0; i < mb_block_count; i++) {
2039             int j;
2040             if (s->block_last_index[i] > 0) {
2041                 for (j = 63; j > 0; j--) {
2042                     if (s->block[i][s->intra_scantable.permutated[j]])
2043                         break;
2044                 }
2045                 s->block_last_index[i] = j;
2046             }
2047         }
2048     }
2049
2050     /* huffman encode */
2051     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2052     case AV_CODEC_ID_MPEG1VIDEO:
2053     case AV_CODEC_ID_MPEG2VIDEO:
2054         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2055             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2056         break;
2057     case AV_CODEC_ID_MPEG4:
2058         if (CONFIG_MPEG4_ENCODER)
2059             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2060         break;
2061     case AV_CODEC_ID_MSMPEG4V2:
2062     case AV_CODEC_ID_MSMPEG4V3:
2063     case AV_CODEC_ID_WMV1:
2064         if (CONFIG_MSMPEG4_ENCODER)
2065             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case AV_CODEC_ID_WMV2:
2068         if (CONFIG_WMV2_ENCODER)
2069             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2070         break;
2071     case AV_CODEC_ID_H261:
2072         if (CONFIG_H261_ENCODER)
2073             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2074         break;
2075     case AV_CODEC_ID_H263:
2076     case AV_CODEC_ID_H263P:
2077     case AV_CODEC_ID_FLV1:
2078     case AV_CODEC_ID_RV10:
2079     case AV_CODEC_ID_RV20:
2080         if (CONFIG_H263_ENCODER)
2081             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2082         break;
2083     case AV_CODEC_ID_MJPEG:
2084         if (CONFIG_MJPEG_ENCODER)
2085             ff_mjpeg_encode_mb(s, s->block);
2086         break;
2087     default:
2088         assert(0);
2089     }
2090 }
2091
2092 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2093 {
2094     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2095     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2096 }
2097
2098 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2099     int i;
2100
2101     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2102
2103     /* mpeg1 */
2104     d->mb_skip_run= s->mb_skip_run;
2105     for(i=0; i<3; i++)
2106         d->last_dc[i] = s->last_dc[i];
2107
2108     /* statistics */
2109     d->mv_bits= s->mv_bits;
2110     d->i_tex_bits= s->i_tex_bits;
2111     d->p_tex_bits= s->p_tex_bits;
2112     d->i_count= s->i_count;
2113     d->f_count= s->f_count;
2114     d->b_count= s->b_count;
2115     d->skip_count= s->skip_count;
2116     d->misc_bits= s->misc_bits;
2117     d->last_bits= 0;
2118
2119     d->mb_skipped= 0;
2120     d->qscale= s->qscale;
2121     d->dquant= s->dquant;
2122
2123     d->esc3_level_length= s->esc3_level_length;
2124 }
2125
2126 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2127     int i;
2128
2129     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2130     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2131
2132     /* mpeg1 */
2133     d->mb_skip_run= s->mb_skip_run;
2134     for(i=0; i<3; i++)
2135         d->last_dc[i] = s->last_dc[i];
2136
2137     /* statistics */
2138     d->mv_bits= s->mv_bits;
2139     d->i_tex_bits= s->i_tex_bits;
2140     d->p_tex_bits= s->p_tex_bits;
2141     d->i_count= s->i_count;
2142     d->f_count= s->f_count;
2143     d->b_count= s->b_count;
2144     d->skip_count= s->skip_count;
2145     d->misc_bits= s->misc_bits;
2146
2147     d->mb_intra= s->mb_intra;
2148     d->mb_skipped= s->mb_skipped;
2149     d->mv_type= s->mv_type;
2150     d->mv_dir= s->mv_dir;
2151     d->pb= s->pb;
2152     if(s->data_partitioning){
2153         d->pb2= s->pb2;
2154         d->tex_pb= s->tex_pb;
2155     }
2156     d->block= s->block;
2157     for(i=0; i<8; i++)
2158         d->block_last_index[i]= s->block_last_index[i];
2159     d->interlaced_dct= s->interlaced_dct;
2160     d->qscale= s->qscale;
2161
2162     d->esc3_level_length= s->esc3_level_length;
2163 }
2164
2165 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2166                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2167                            int *dmin, int *next_block, int motion_x, int motion_y)
2168 {
2169     int score;
2170     uint8_t *dest_backup[3];
2171
2172     copy_context_before_encode(s, backup, type);
2173
2174     s->block= s->blocks[*next_block];
2175     s->pb= pb[*next_block];
2176     if(s->data_partitioning){
2177         s->pb2   = pb2   [*next_block];
2178         s->tex_pb= tex_pb[*next_block];
2179     }
2180
2181     if(*next_block){
2182         memcpy(dest_backup, s->dest, sizeof(s->dest));
2183         s->dest[0] = s->rd_scratchpad;
2184         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2185         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2186         assert(s->linesize >= 32); //FIXME
2187     }
2188
2189     encode_mb(s, motion_x, motion_y);
2190
2191     score= put_bits_count(&s->pb);
2192     if(s->data_partitioning){
2193         score+= put_bits_count(&s->pb2);
2194         score+= put_bits_count(&s->tex_pb);
2195     }
2196
2197     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2198         ff_MPV_decode_mb(s, s->block);
2199
2200         score *= s->lambda2;
2201         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2202     }
2203
2204     if(*next_block){
2205         memcpy(s->dest, dest_backup, sizeof(s->dest));
2206     }
2207
2208     if(score<*dmin){
2209         *dmin= score;
2210         *next_block^=1;
2211
2212         copy_context_after_encode(best, s, type);
2213     }
2214 }
2215
2216 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2217     uint32_t *sq = ff_squareTbl + 256;
2218     int acc=0;
2219     int x,y;
2220
2221     if(w==16 && h==16)
2222         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2223     else if(w==8 && h==8)
2224         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2225
2226     for(y=0; y<h; y++){
2227         for(x=0; x<w; x++){
2228             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2229         }
2230     }
2231
2232     assert(acc>=0);
2233
2234     return acc;
2235 }
2236
2237 static int sse_mb(MpegEncContext *s){
2238     int w= 16;
2239     int h= 16;
2240
2241     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2242     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2243
2244     if(w==16 && h==16)
2245       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2246         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2247                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2248                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2249       }else{
2250         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2251                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2252                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2253       }
2254     else
2255         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2256                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2257                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2258 }
2259
2260 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2261     MpegEncContext *s= *(void**)arg;
2262
2263
2264     s->me.pre_pass=1;
2265     s->me.dia_size= s->avctx->pre_dia_size;
2266     s->first_slice_line=1;
2267     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2268         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2269             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2270         }
2271         s->first_slice_line=0;
2272     }
2273
2274     s->me.pre_pass=0;
2275
2276     return 0;
2277 }
2278
2279 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2280     MpegEncContext *s= *(void**)arg;
2281
2282     ff_check_alignment();
2283
2284     s->me.dia_size= s->avctx->dia_size;
2285     s->first_slice_line=1;
2286     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2287         s->mb_x=0; //for block init below
2288         ff_init_block_index(s);
2289         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2290             s->block_index[0]+=2;
2291             s->block_index[1]+=2;
2292             s->block_index[2]+=2;
2293             s->block_index[3]+=2;
2294
2295             /* compute motion vector & mb_type and store in context */
2296             if(s->pict_type==AV_PICTURE_TYPE_B)
2297                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2298             else
2299                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2300         }
2301         s->first_slice_line=0;
2302     }
2303     return 0;
2304 }
2305
2306 static int mb_var_thread(AVCodecContext *c, void *arg){
2307     MpegEncContext *s= *(void**)arg;
2308     int mb_x, mb_y;
2309
2310     ff_check_alignment();
2311
2312     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2313         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2314             int xx = mb_x * 16;
2315             int yy = mb_y * 16;
2316             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2317             int varc;
2318             int sum = s->dsp.pix_sum(pix, s->linesize);
2319
2320             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2321
2322             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2323             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2324             s->me.mb_var_sum_temp    += varc;
2325         }
2326     }
2327     return 0;
2328 }
2329
2330 static void write_slice_end(MpegEncContext *s){
2331     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2332         if(s->partitioned_frame){
2333             ff_mpeg4_merge_partitions(s);
2334         }
2335
2336         ff_mpeg4_stuffing(&s->pb);
2337     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2338         ff_mjpeg_encode_stuffing(&s->pb);
2339     }
2340
2341     avpriv_align_put_bits(&s->pb);
2342     flush_put_bits(&s->pb);
2343
2344     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2345         s->misc_bits+= get_bits_diff(s);
2346 }
2347
2348 static void write_mb_info(MpegEncContext *s)
2349 {
2350     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2351     int offset = put_bits_count(&s->pb);
2352     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2353     int gobn = s->mb_y / s->gob_index;
2354     int pred_x, pred_y;
2355     if (CONFIG_H263_ENCODER)
2356         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2357     bytestream_put_le32(&ptr, offset);
2358     bytestream_put_byte(&ptr, s->qscale);
2359     bytestream_put_byte(&ptr, gobn);
2360     bytestream_put_le16(&ptr, mba);
2361     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2362     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2363     /* 4MV not implemented */
2364     bytestream_put_byte(&ptr, 0); /* hmv2 */
2365     bytestream_put_byte(&ptr, 0); /* vmv2 */
2366 }
2367
2368 static void update_mb_info(MpegEncContext *s, int startcode)
2369 {
2370     if (!s->mb_info)
2371         return;
2372     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2373         s->mb_info_size += 12;
2374         s->prev_mb_info = s->last_mb_info;
2375     }
2376     if (startcode) {
2377         s->prev_mb_info = put_bits_count(&s->pb)/8;
2378         /* This might have incremented mb_info_size above, and we return without
2379          * actually writing any info into that slot yet. But in that case,
2380          * this will be called again at the start of the after writing the
2381          * start code, actually writing the mb info. */
2382         return;
2383     }
2384
2385     s->last_mb_info = put_bits_count(&s->pb)/8;
2386     if (!s->mb_info_size)
2387         s->mb_info_size += 12;
2388     write_mb_info(s);
2389 }
2390
2391 static int encode_thread(AVCodecContext *c, void *arg){
2392     MpegEncContext *s= *(void**)arg;
2393     int mb_x, mb_y, pdif = 0;
2394     int chr_h= 16>>s->chroma_y_shift;
2395     int i, j;
2396     MpegEncContext best_s, backup_s;
2397     uint8_t bit_buf[2][MAX_MB_BYTES];
2398     uint8_t bit_buf2[2][MAX_MB_BYTES];
2399     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2400     PutBitContext pb[2], pb2[2], tex_pb[2];
2401
2402     ff_check_alignment();
2403
2404     for(i=0; i<2; i++){
2405         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2406         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2407         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2408     }
2409
2410     s->last_bits= put_bits_count(&s->pb);
2411     s->mv_bits=0;
2412     s->misc_bits=0;
2413     s->i_tex_bits=0;
2414     s->p_tex_bits=0;
2415     s->i_count=0;
2416     s->f_count=0;
2417     s->b_count=0;
2418     s->skip_count=0;
2419
2420     for(i=0; i<3; i++){
2421         /* init last dc values */
2422         /* note: quant matrix value (8) is implied here */
2423         s->last_dc[i] = 128 << s->intra_dc_precision;
2424
2425         s->current_picture.f.error[i] = 0;
2426     }
2427     s->mb_skip_run = 0;
2428     memset(s->last_mv, 0, sizeof(s->last_mv));
2429
2430     s->last_mv_dir = 0;
2431
2432     switch(s->codec_id){
2433     case AV_CODEC_ID_H263:
2434     case AV_CODEC_ID_H263P:
2435     case AV_CODEC_ID_FLV1:
2436         if (CONFIG_H263_ENCODER)
2437             s->gob_index = ff_h263_get_gob_height(s);
2438         break;
2439     case AV_CODEC_ID_MPEG4:
2440         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2441             ff_mpeg4_init_partitions(s);
2442         break;
2443     }
2444
2445     s->resync_mb_x=0;
2446     s->resync_mb_y=0;
2447     s->first_slice_line = 1;
2448     s->ptr_lastgob = s->pb.buf;
2449     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2450         s->mb_x=0;
2451         s->mb_y= mb_y;
2452
2453         ff_set_qscale(s, s->qscale);
2454         ff_init_block_index(s);
2455
2456         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2457             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2458             int mb_type= s->mb_type[xy];
2459 //            int d;
2460             int dmin= INT_MAX;
2461             int dir;
2462
2463             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2464                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2465                 return -1;
2466             }
2467             if(s->data_partitioning){
2468                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2469                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2470                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2471                     return -1;
2472                 }
2473             }
2474
2475             s->mb_x = mb_x;
2476             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2477             ff_update_block_index(s);
2478
2479             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2480                 ff_h261_reorder_mb_index(s);
2481                 xy= s->mb_y*s->mb_stride + s->mb_x;
2482                 mb_type= s->mb_type[xy];
2483             }
2484
2485             /* write gob / video packet header  */
2486             if(s->rtp_mode){
2487                 int current_packet_size, is_gob_start;
2488
2489                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2490
2491                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2492
2493                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2494
2495                 switch(s->codec_id){
2496                 case AV_CODEC_ID_H263:
2497                 case AV_CODEC_ID_H263P:
2498                     if(!s->h263_slice_structured)
2499                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2500                     break;
2501                 case AV_CODEC_ID_MPEG2VIDEO:
2502                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2503                 case AV_CODEC_ID_MPEG1VIDEO:
2504                     if(s->mb_skip_run) is_gob_start=0;
2505                     break;
2506                 }
2507
2508                 if(is_gob_start){
2509                     if(s->start_mb_y != mb_y || mb_x!=0){
2510                         write_slice_end(s);
2511
2512                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2513                             ff_mpeg4_init_partitions(s);
2514                         }
2515                     }
2516
2517                     assert((put_bits_count(&s->pb)&7) == 0);
2518                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2519
2520                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2521                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2522                         int d= 100 / s->avctx->error_rate;
2523                         if(r % d == 0){
2524                             current_packet_size=0;
2525                             s->pb.buf_ptr= s->ptr_lastgob;
2526                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2527                         }
2528                     }
2529
2530                     if (s->avctx->rtp_callback){
2531                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2532                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2533                     }
2534                     update_mb_info(s, 1);
2535
2536                     switch(s->codec_id){
2537                     case AV_CODEC_ID_MPEG4:
2538                         if (CONFIG_MPEG4_ENCODER) {
2539                             ff_mpeg4_encode_video_packet_header(s);
2540                             ff_mpeg4_clean_buffers(s);
2541                         }
2542                     break;
2543                     case AV_CODEC_ID_MPEG1VIDEO:
2544                     case AV_CODEC_ID_MPEG2VIDEO:
2545                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2546                             ff_mpeg1_encode_slice_header(s);
2547                             ff_mpeg1_clean_buffers(s);
2548                         }
2549                     break;
2550                     case AV_CODEC_ID_H263:
2551                     case AV_CODEC_ID_H263P:
2552                         if (CONFIG_H263_ENCODER)
2553                             ff_h263_encode_gob_header(s, mb_y);
2554                     break;
2555                     }
2556
2557                     if(s->flags&CODEC_FLAG_PASS1){
2558                         int bits= put_bits_count(&s->pb);
2559                         s->misc_bits+= bits - s->last_bits;
2560                         s->last_bits= bits;
2561                     }
2562
2563                     s->ptr_lastgob += current_packet_size;
2564                     s->first_slice_line=1;
2565                     s->resync_mb_x=mb_x;
2566                     s->resync_mb_y=mb_y;
2567                 }
2568             }
2569
2570             if(  (s->resync_mb_x   == s->mb_x)
2571                && s->resync_mb_y+1 == s->mb_y){
2572                 s->first_slice_line=0;
2573             }
2574
2575             s->mb_skipped=0;
2576             s->dquant=0; //only for QP_RD
2577
2578             update_mb_info(s, 0);
2579
2580             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2581                 int next_block=0;
2582                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2583
2584                 copy_context_before_encode(&backup_s, s, -1);
2585                 backup_s.pb= s->pb;
2586                 best_s.data_partitioning= s->data_partitioning;
2587                 best_s.partitioned_frame= s->partitioned_frame;
2588                 if(s->data_partitioning){
2589                     backup_s.pb2= s->pb2;
2590                     backup_s.tex_pb= s->tex_pb;
2591                 }
2592
2593                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2594                     s->mv_dir = MV_DIR_FORWARD;
2595                     s->mv_type = MV_TYPE_16X16;
2596                     s->mb_intra= 0;
2597                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2598                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2599                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2600                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2601                 }
2602                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2603                     s->mv_dir = MV_DIR_FORWARD;
2604                     s->mv_type = MV_TYPE_FIELD;
2605                     s->mb_intra= 0;
2606                     for(i=0; i<2; i++){
2607                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2608                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2609                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2610                     }
2611                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2612                                  &dmin, &next_block, 0, 0);
2613                 }
2614                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2615                     s->mv_dir = MV_DIR_FORWARD;
2616                     s->mv_type = MV_TYPE_16X16;
2617                     s->mb_intra= 0;
2618                     s->mv[0][0][0] = 0;
2619                     s->mv[0][0][1] = 0;
2620                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2621                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2622                 }
2623                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2624                     s->mv_dir = MV_DIR_FORWARD;
2625                     s->mv_type = MV_TYPE_8X8;
2626                     s->mb_intra= 0;
2627                     for(i=0; i<4; i++){
2628                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2629                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2630                     }
2631                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2632                                  &dmin, &next_block, 0, 0);
2633                 }
2634                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2635                     s->mv_dir = MV_DIR_FORWARD;
2636                     s->mv_type = MV_TYPE_16X16;
2637                     s->mb_intra= 0;
2638                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2639                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2640                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2641                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2642                 }
2643                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2644                     s->mv_dir = MV_DIR_BACKWARD;
2645                     s->mv_type = MV_TYPE_16X16;
2646                     s->mb_intra= 0;
2647                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2648                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2649                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2650                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2651                 }
2652                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2653                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2654                     s->mv_type = MV_TYPE_16X16;
2655                     s->mb_intra= 0;
2656                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2657                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2658                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2659                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2660                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2661                                  &dmin, &next_block, 0, 0);
2662                 }
2663                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2664                     s->mv_dir = MV_DIR_FORWARD;
2665                     s->mv_type = MV_TYPE_FIELD;
2666                     s->mb_intra= 0;
2667                     for(i=0; i<2; i++){
2668                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2669                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2670                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2671                     }
2672                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2673                                  &dmin, &next_block, 0, 0);
2674                 }
2675                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2676                     s->mv_dir = MV_DIR_BACKWARD;
2677                     s->mv_type = MV_TYPE_FIELD;
2678                     s->mb_intra= 0;
2679                     for(i=0; i<2; i++){
2680                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2681                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2682                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2683                     }
2684                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2685                                  &dmin, &next_block, 0, 0);
2686                 }
2687                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2688                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2689                     s->mv_type = MV_TYPE_FIELD;
2690                     s->mb_intra= 0;
2691                     for(dir=0; dir<2; dir++){
2692                         for(i=0; i<2; i++){
2693                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2694                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2695                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2696                         }
2697                     }
2698                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2699                                  &dmin, &next_block, 0, 0);
2700                 }
2701                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2702                     s->mv_dir = 0;
2703                     s->mv_type = MV_TYPE_16X16;
2704                     s->mb_intra= 1;
2705                     s->mv[0][0][0] = 0;
2706                     s->mv[0][0][1] = 0;
2707                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2708                                  &dmin, &next_block, 0, 0);
2709                     if(s->h263_pred || s->h263_aic){
2710                         if(best_s.mb_intra)
2711                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2712                         else
2713                             ff_clean_intra_table_entries(s); //old mode?
2714                     }
2715                 }
2716
2717                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2718                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2719                         const int last_qp= backup_s.qscale;
2720                         int qpi, qp, dc[6];
2721                         int16_t ac[6][16];
2722                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2723                         static const int dquant_tab[4]={-1,1,-2,2};
2724
2725                         assert(backup_s.dquant == 0);
2726
2727                         //FIXME intra
2728                         s->mv_dir= best_s.mv_dir;
2729                         s->mv_type = MV_TYPE_16X16;
2730                         s->mb_intra= best_s.mb_intra;
2731                         s->mv[0][0][0] = best_s.mv[0][0][0];
2732                         s->mv[0][0][1] = best_s.mv[0][0][1];
2733                         s->mv[1][0][0] = best_s.mv[1][0][0];
2734                         s->mv[1][0][1] = best_s.mv[1][0][1];
2735
2736                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2737                         for(; qpi<4; qpi++){
2738                             int dquant= dquant_tab[qpi];
2739                             qp= last_qp + dquant;
2740                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2741                                 continue;
2742                             backup_s.dquant= dquant;
2743                             if(s->mb_intra && s->dc_val[0]){
2744                                 for(i=0; i<6; i++){
2745                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2746                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2747                                 }
2748                             }
2749
2750                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2751                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2752                             if(best_s.qscale != qp){
2753                                 if(s->mb_intra && s->dc_val[0]){
2754                                     for(i=0; i<6; i++){
2755                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2756                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2757                                     }
2758                                 }
2759                             }
2760                         }
2761                     }
2762                 }
2763                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2764                     int mx= s->b_direct_mv_table[xy][0];
2765                     int my= s->b_direct_mv_table[xy][1];
2766
2767                     backup_s.dquant = 0;
2768                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2769                     s->mb_intra= 0;
2770                     ff_mpeg4_set_direct_mv(s, mx, my);
2771                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2772                                  &dmin, &next_block, mx, my);
2773                 }
2774                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2775                     backup_s.dquant = 0;
2776                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2777                     s->mb_intra= 0;
2778                     ff_mpeg4_set_direct_mv(s, 0, 0);
2779                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2780                                  &dmin, &next_block, 0, 0);
2781                 }
2782                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2783                     int coded=0;
2784                     for(i=0; i<6; i++)
2785                         coded |= s->block_last_index[i];
2786                     if(coded){
2787                         int mx,my;
2788                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2789                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2790                             mx=my=0; //FIXME find the one we actually used
2791                             ff_mpeg4_set_direct_mv(s, mx, my);
2792                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2793                             mx= s->mv[1][0][0];
2794                             my= s->mv[1][0][1];
2795                         }else{
2796                             mx= s->mv[0][0][0];
2797                             my= s->mv[0][0][1];
2798                         }
2799
2800                         s->mv_dir= best_s.mv_dir;
2801                         s->mv_type = best_s.mv_type;
2802                         s->mb_intra= 0;
2803 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2804                         s->mv[0][0][1] = best_s.mv[0][0][1];
2805                         s->mv[1][0][0] = best_s.mv[1][0][0];
2806                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2807                         backup_s.dquant= 0;
2808                         s->skipdct=1;
2809                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2810                                         &dmin, &next_block, mx, my);
2811                         s->skipdct=0;
2812                     }
2813                 }
2814
2815                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2816
2817                 copy_context_after_encode(s, &best_s, -1);
2818
2819                 pb_bits_count= put_bits_count(&s->pb);
2820                 flush_put_bits(&s->pb);
2821                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2822                 s->pb= backup_s.pb;
2823
2824                 if(s->data_partitioning){
2825                     pb2_bits_count= put_bits_count(&s->pb2);
2826                     flush_put_bits(&s->pb2);
2827                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2828                     s->pb2= backup_s.pb2;
2829
2830                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2831                     flush_put_bits(&s->tex_pb);
2832                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2833                     s->tex_pb= backup_s.tex_pb;
2834                 }
2835                 s->last_bits= put_bits_count(&s->pb);
2836
2837                 if (CONFIG_H263_ENCODER &&
2838                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2839                     ff_h263_update_motion_val(s);
2840
2841                 if(next_block==0){ //FIXME 16 vs linesize16
2842                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2843                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2844                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2845                 }
2846
2847                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2848                     ff_MPV_decode_mb(s, s->block);
2849             } else {
2850                 int motion_x = 0, motion_y = 0;
2851                 s->mv_type=MV_TYPE_16X16;
2852                 // only one MB-Type possible
2853
2854                 switch(mb_type){
2855                 case CANDIDATE_MB_TYPE_INTRA:
2856                     s->mv_dir = 0;
2857                     s->mb_intra= 1;
2858                     motion_x= s->mv[0][0][0] = 0;
2859                     motion_y= s->mv[0][0][1] = 0;
2860                     break;
2861                 case CANDIDATE_MB_TYPE_INTER:
2862                     s->mv_dir = MV_DIR_FORWARD;
2863                     s->mb_intra= 0;
2864                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2865                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2866                     break;
2867                 case CANDIDATE_MB_TYPE_INTER_I:
2868                     s->mv_dir = MV_DIR_FORWARD;
2869                     s->mv_type = MV_TYPE_FIELD;
2870                     s->mb_intra= 0;
2871                     for(i=0; i<2; i++){
2872                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2873                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2874                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2875                     }
2876                     break;
2877                 case CANDIDATE_MB_TYPE_INTER4V:
2878                     s->mv_dir = MV_DIR_FORWARD;
2879                     s->mv_type = MV_TYPE_8X8;
2880                     s->mb_intra= 0;
2881                     for(i=0; i<4; i++){
2882                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2883                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2884                     }
2885                     break;
2886                 case CANDIDATE_MB_TYPE_DIRECT:
2887                     if (CONFIG_MPEG4_ENCODER) {
2888                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2889                         s->mb_intra= 0;
2890                         motion_x=s->b_direct_mv_table[xy][0];
2891                         motion_y=s->b_direct_mv_table[xy][1];
2892                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2893                     }
2894                     break;
2895                 case CANDIDATE_MB_TYPE_DIRECT0:
2896                     if (CONFIG_MPEG4_ENCODER) {
2897                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2898                         s->mb_intra= 0;
2899                         ff_mpeg4_set_direct_mv(s, 0, 0);
2900                     }
2901                     break;
2902                 case CANDIDATE_MB_TYPE_BIDIR:
2903                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2904                     s->mb_intra= 0;
2905                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2906                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2907                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2908                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2909                     break;
2910                 case CANDIDATE_MB_TYPE_BACKWARD:
2911                     s->mv_dir = MV_DIR_BACKWARD;
2912                     s->mb_intra= 0;
2913                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2914                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2915                     break;
2916                 case CANDIDATE_MB_TYPE_FORWARD:
2917                     s->mv_dir = MV_DIR_FORWARD;
2918                     s->mb_intra= 0;
2919                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2920                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2921                     break;
2922                 case CANDIDATE_MB_TYPE_FORWARD_I:
2923                     s->mv_dir = MV_DIR_FORWARD;
2924                     s->mv_type = MV_TYPE_FIELD;
2925                     s->mb_intra= 0;
2926                     for(i=0; i<2; i++){
2927                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2928                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2929                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2930                     }
2931                     break;
2932                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2933                     s->mv_dir = MV_DIR_BACKWARD;
2934                     s->mv_type = MV_TYPE_FIELD;
2935                     s->mb_intra= 0;
2936                     for(i=0; i<2; i++){
2937                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2938                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2939                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2940                     }
2941                     break;
2942                 case CANDIDATE_MB_TYPE_BIDIR_I:
2943                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2944                     s->mv_type = MV_TYPE_FIELD;
2945                     s->mb_intra= 0;
2946                     for(dir=0; dir<2; dir++){
2947                         for(i=0; i<2; i++){
2948                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2949                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2950                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2951                         }
2952                     }
2953                     break;
2954                 default:
2955                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2956                 }
2957
2958                 encode_mb(s, motion_x, motion_y);
2959
2960                 // RAL: Update last macroblock type
2961                 s->last_mv_dir = s->mv_dir;
2962
2963                 if (CONFIG_H263_ENCODER &&
2964                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2965                     ff_h263_update_motion_val(s);
2966
2967                 ff_MPV_decode_mb(s, s->block);
2968             }
2969
2970             /* clean the MV table in IPS frames for direct mode in B frames */
2971             if(s->mb_intra /* && I,P,S_TYPE */){
2972                 s->p_mv_table[xy][0]=0;
2973                 s->p_mv_table[xy][1]=0;
2974             }
2975
2976             if(s->flags&CODEC_FLAG_PSNR){
2977                 int w= 16;
2978                 int h= 16;
2979
2980                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2981                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2982
2983                 s->current_picture.f.error[0] += sse(
2984                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2985                     s->dest[0], w, h, s->linesize);
2986                 s->current_picture.f.error[1] += sse(
2987                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2988                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2989                 s->current_picture.f.error[2] += sse(
2990                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2991                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2992             }
2993             if(s->loop_filter){
2994                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2995                     ff_h263_loop_filter(s);
2996             }
2997             av_dlog(s->avctx, "MB %d %d bits\n",
2998                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2999         }
3000     }
3001
3002     //not beautiful here but we must write it before flushing so it has to be here
3003     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3004         ff_msmpeg4_encode_ext_header(s);
3005
3006     write_slice_end(s);
3007
3008     /* Send the last GOB if RTP */
3009     if (s->avctx->rtp_callback) {
3010         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3011         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3012         /* Call the RTP callback to send the last GOB */
3013         emms_c();
3014         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3015     }
3016
3017     return 0;
3018 }
3019
3020 #define MERGE(field) dst->field += src->field; src->field=0
3021 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3022     MERGE(me.scene_change_score);
3023     MERGE(me.mc_mb_var_sum_temp);
3024     MERGE(me.mb_var_sum_temp);
3025 }
3026
3027 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3028     int i;
3029
3030     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3031     MERGE(dct_count[1]);
3032     MERGE(mv_bits);
3033     MERGE(i_tex_bits);
3034     MERGE(p_tex_bits);
3035     MERGE(i_count);
3036     MERGE(f_count);
3037     MERGE(b_count);
3038     MERGE(skip_count);
3039     MERGE(misc_bits);
3040     MERGE(er.error_count);
3041     MERGE(padding_bug_score);
3042     MERGE(current_picture.f.error[0]);
3043     MERGE(current_picture.f.error[1]);
3044     MERGE(current_picture.f.error[2]);
3045
3046     if(dst->avctx->noise_reduction){
3047         for(i=0; i<64; i++){
3048             MERGE(dct_error_sum[0][i]);
3049             MERGE(dct_error_sum[1][i]);
3050         }
3051     }
3052
3053     assert(put_bits_count(&src->pb) % 8 ==0);
3054     assert(put_bits_count(&dst->pb) % 8 ==0);
3055     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3056     flush_put_bits(&dst->pb);
3057 }
3058
3059 static int estimate_qp(MpegEncContext *s, int dry_run){
3060     if (s->next_lambda){
3061         s->current_picture_ptr->f.quality =
3062         s->current_picture.f.quality = s->next_lambda;
3063         if(!dry_run) s->next_lambda= 0;
3064     } else if (!s->fixed_qscale) {
3065         s->current_picture_ptr->f.quality =
3066         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3067         if (s->current_picture.f.quality < 0)
3068             return -1;
3069     }
3070
3071     if(s->adaptive_quant){
3072         switch(s->codec_id){
3073         case AV_CODEC_ID_MPEG4:
3074             if (CONFIG_MPEG4_ENCODER)
3075                 ff_clean_mpeg4_qscales(s);
3076             break;
3077         case AV_CODEC_ID_H263:
3078         case AV_CODEC_ID_H263P:
3079         case AV_CODEC_ID_FLV1:
3080             if (CONFIG_H263_ENCODER)
3081                 ff_clean_h263_qscales(s);
3082             break;
3083         default:
3084             ff_init_qscale_tab(s);
3085         }
3086
3087         s->lambda= s->lambda_table[0];
3088         //FIXME broken
3089     }else
3090         s->lambda = s->current_picture.f.quality;
3091     update_qscale(s);
3092     return 0;
3093 }
3094
3095 /* must be called before writing the header */
3096 static void set_frame_distances(MpegEncContext * s){
3097     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3098     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3099
3100     if(s->pict_type==AV_PICTURE_TYPE_B){
3101         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3102         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3103     }else{
3104         s->pp_time= s->time - s->last_non_b_time;
3105         s->last_non_b_time= s->time;
3106         assert(s->picture_number==0 || s->pp_time > 0);
3107     }
3108 }
3109
3110 static int encode_picture(MpegEncContext *s, int picture_number)
3111 {
3112     int i, ret;
3113     int bits;
3114     int context_count = s->slice_context_count;
3115
3116     s->picture_number = picture_number;
3117
3118     /* Reset the average MB variance */
3119     s->me.mb_var_sum_temp    =
3120     s->me.mc_mb_var_sum_temp = 0;
3121
3122     /* we need to initialize some time vars before we can encode b-frames */
3123     // RAL: Condition added for MPEG1VIDEO
3124     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3125         set_frame_distances(s);
3126     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3127         ff_set_mpeg4_time(s);
3128
3129     s->me.scene_change_score=0;
3130
3131 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3132
3133     if(s->pict_type==AV_PICTURE_TYPE_I){
3134         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3135         else                        s->no_rounding=0;
3136     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3137         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3138             s->no_rounding ^= 1;
3139     }
3140
3141     if(s->flags & CODEC_FLAG_PASS2){
3142         if (estimate_qp(s,1) < 0)
3143             return -1;
3144         ff_get_2pass_fcode(s);
3145     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3146         if(s->pict_type==AV_PICTURE_TYPE_B)
3147             s->lambda= s->last_lambda_for[s->pict_type];
3148         else
3149             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3150         update_qscale(s);
3151     }
3152
3153     s->mb_intra=0; //for the rate distortion & bit compare functions
3154     for(i=1; i<context_count; i++){
3155         ret = ff_update_duplicate_context(s->thread_context[i], s);
3156         if (ret < 0)
3157             return ret;
3158     }
3159
3160     if(ff_init_me(s)<0)
3161         return -1;
3162
3163     /* Estimate motion for every MB */
3164     if(s->pict_type != AV_PICTURE_TYPE_I){
3165         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3166         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3167         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3168             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3169                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3170             }
3171         }
3172
3173         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3174     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3175         /* I-Frame */
3176         for(i=0; i<s->mb_stride*s->mb_height; i++)
3177             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3178
3179         if(!s->fixed_qscale){
3180             /* finding spatial complexity for I-frame rate control */
3181             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3182         }
3183     }
3184     for(i=1; i<context_count; i++){
3185         merge_context_after_me(s, s->thread_context[i]);
3186     }
3187     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3188     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3189     emms_c();
3190
3191     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3192         s->pict_type= AV_PICTURE_TYPE_I;
3193         for(i=0; i<s->mb_stride*s->mb_height; i++)
3194             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3195         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3196                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3197     }
3198
3199     if(!s->umvplus){
3200         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3201             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3202
3203             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3204                 int a,b;
3205                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3206                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3207                 s->f_code= FFMAX3(s->f_code, a, b);
3208             }
3209
3210             ff_fix_long_p_mvs(s);
3211             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3212             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3213                 int j;
3214                 for(i=0; i<2; i++){
3215                     for(j=0; j<2; j++)
3216                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3217                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3218                 }
3219             }
3220         }
3221
3222         if(s->pict_type==AV_PICTURE_TYPE_B){
3223             int a, b;
3224
3225             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3226             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3227             s->f_code = FFMAX(a, b);
3228
3229             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3230             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3231             s->b_code = FFMAX(a, b);
3232
3233             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3234             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3235             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3236             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3237             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3238                 int dir, j;
3239                 for(dir=0; dir<2; dir++){
3240                     for(i=0; i<2; i++){
3241                         for(j=0; j<2; j++){
3242                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3243                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3244                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3245                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3246                         }
3247                     }
3248                 }
3249             }
3250         }
3251     }
3252
3253     if (estimate_qp(s, 0) < 0)
3254         return -1;
3255
3256     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3257         s->qscale= 3; //reduce clipping problems
3258
3259     if (s->out_format == FMT_MJPEG) {
3260         /* for mjpeg, we do include qscale in the matrix */
3261         for(i=1;i<64;i++){
3262             int j= s->dsp.idct_permutation[i];
3263
3264             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3265         }
3266         s->y_dc_scale_table=
3267         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3268         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3269         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3270                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3271         s->qscale= 8;
3272     }
3273
3274     //FIXME var duplication
3275     s->current_picture_ptr->f.key_frame =
3276     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3277     s->current_picture_ptr->f.pict_type =
3278     s->current_picture.f.pict_type = s->pict_type;
3279
3280     if (s->current_picture.f.key_frame)
3281         s->picture_in_gop_number=0;
3282
3283     s->last_bits= put_bits_count(&s->pb);
3284     switch(s->out_format) {
3285     case FMT_MJPEG:
3286         if (CONFIG_MJPEG_ENCODER)
3287             ff_mjpeg_encode_picture_header(s);
3288         break;
3289     case FMT_H261:
3290         if (CONFIG_H261_ENCODER)
3291             ff_h261_encode_picture_header(s, picture_number);
3292         break;
3293     case FMT_H263:
3294         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3295             ff_wmv2_encode_picture_header(s, picture_number);
3296         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3297             ff_msmpeg4_encode_picture_header(s, picture_number);
3298         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3299             ff_mpeg4_encode_picture_header(s, picture_number);
3300         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3301             ff_rv10_encode_picture_header(s, picture_number);
3302         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3303             ff_rv20_encode_picture_header(s, picture_number);
3304         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3305             ff_flv_encode_picture_header(s, picture_number);
3306         else if (CONFIG_H263_ENCODER)
3307             ff_h263_encode_picture_header(s, picture_number);
3308         break;
3309     case FMT_MPEG1:
3310         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3311             ff_mpeg1_encode_picture_header(s, picture_number);
3312         break;
3313     case FMT_H264:
3314         break;
3315     default:
3316         assert(0);
3317     }
3318     bits= put_bits_count(&s->pb);
3319     s->header_bits= bits - s->last_bits;
3320
3321     for(i=1; i<context_count; i++){
3322         update_duplicate_context_after_me(s->thread_context[i], s);
3323     }
3324     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3325     for(i=1; i<context_count; i++){
3326         merge_context_after_encode(s, s->thread_context[i]);
3327     }
3328     emms_c();
3329     return 0;
3330 }
3331
3332 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3333     const int intra= s->mb_intra;
3334     int i;
3335
3336     s->dct_count[intra]++;
3337
3338     for(i=0; i<64; i++){
3339         int level= block[i];
3340
3341         if(level){
3342             if(level>0){
3343                 s->dct_error_sum[intra][i] += level;
3344                 level -= s->dct_offset[intra][i];
3345                 if(level<0) level=0;
3346             }else{
3347                 s->dct_error_sum[intra][i] -= level;
3348                 level += s->dct_offset[intra][i];
3349                 if(level>0) level=0;
3350             }
3351             block[i]= level;
3352         }
3353     }
3354 }
3355
3356 static int dct_quantize_trellis_c(MpegEncContext *s,
3357                                   int16_t *block, int n,
3358                                   int qscale, int *overflow){
3359     const int *qmat;
3360     const uint8_t *scantable= s->intra_scantable.scantable;
3361     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3362     int max=0;
3363     unsigned int threshold1, threshold2;
3364     int bias=0;
3365     int run_tab[65];
3366     int level_tab[65];
3367     int score_tab[65];
3368     int survivor[65];
3369     int survivor_count;
3370     int last_run=0;
3371     int last_level=0;
3372     int last_score= 0;
3373     int last_i;
3374     int coeff[2][64];
3375     int coeff_count[64];
3376     int qmul, qadd, start_i, last_non_zero, i, dc;
3377     const int esc_length= s->ac_esc_length;
3378     uint8_t * length;
3379     uint8_t * last_length;
3380     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3381
3382     s->dsp.fdct (block);
3383
3384     if(s->dct_error_sum)
3385         s->denoise_dct(s, block);
3386     qmul= qscale*16;
3387     qadd= ((qscale-1)|1)*8;
3388
3389     if (s->mb_intra) {
3390         int q;
3391         if (!s->h263_aic) {
3392             if (n < 4)
3393                 q = s->y_dc_scale;
3394             else
3395                 q = s->c_dc_scale;
3396             q = q << 3;
3397         } else{
3398             /* For AIC we skip quant/dequant of INTRADC */
3399             q = 1 << 3;
3400             qadd=0;
3401         }
3402
3403         /* note: block[0] is assumed to be positive */
3404         block[0] = (block[0] + (q >> 1)) / q;
3405         start_i = 1;
3406         last_non_zero = 0;
3407         qmat = s->q_intra_matrix[qscale];
3408         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3409             bias= 1<<(QMAT_SHIFT-1);
3410         length     = s->intra_ac_vlc_length;
3411         last_length= s->intra_ac_vlc_last_length;
3412     } else {
3413         start_i = 0;
3414         last_non_zero = -1;
3415         qmat = s->q_inter_matrix[qscale];
3416         length     = s->inter_ac_vlc_length;
3417         last_length= s->inter_ac_vlc_last_length;
3418     }
3419     last_i= start_i;
3420
3421     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3422     threshold2= (threshold1<<1);
3423
3424     for(i=63; i>=start_i; i--) {
3425         const int j = scantable[i];
3426         int level = block[j] * qmat[j];
3427
3428         if(((unsigned)(level+threshold1))>threshold2){
3429             last_non_zero = i;
3430             break;
3431         }
3432     }
3433
3434     for(i=start_i; i<=last_non_zero; i++) {
3435         const int j = scantable[i];
3436         int level = block[j] * qmat[j];
3437
3438 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3439 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3440         if(((unsigned)(level+threshold1))>threshold2){
3441             if(level>0){
3442                 level= (bias + level)>>QMAT_SHIFT;
3443                 coeff[0][i]= level;
3444                 coeff[1][i]= level-1;
3445 //                coeff[2][k]= level-2;
3446             }else{
3447                 level= (bias - level)>>QMAT_SHIFT;
3448                 coeff[0][i]= -level;
3449                 coeff[1][i]= -level+1;
3450 //                coeff[2][k]= -level+2;
3451             }
3452             coeff_count[i]= FFMIN(level, 2);
3453             assert(coeff_count[i]);
3454             max |=level;
3455         }else{
3456             coeff[0][i]= (level>>31)|1;
3457             coeff_count[i]= 1;
3458         }
3459     }
3460
3461     *overflow= s->max_qcoeff < max; //overflow might have happened
3462
3463     if(last_non_zero < start_i){
3464         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3465         return last_non_zero;
3466     }
3467
3468     score_tab[start_i]= 0;
3469     survivor[0]= start_i;
3470     survivor_count= 1;
3471
3472     for(i=start_i; i<=last_non_zero; i++){
3473         int level_index, j, zero_distortion;
3474         int dct_coeff= FFABS(block[ scantable[i] ]);
3475         int best_score=256*256*256*120;
3476
3477         if (s->dsp.fdct == ff_fdct_ifast)
3478             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3479         zero_distortion= dct_coeff*dct_coeff;
3480
3481         for(level_index=0; level_index < coeff_count[i]; level_index++){
3482             int distortion;
3483             int level= coeff[level_index][i];
3484             const int alevel= FFABS(level);
3485             int unquant_coeff;
3486
3487             assert(level);
3488
3489             if(s->out_format == FMT_H263){
3490                 unquant_coeff= alevel*qmul + qadd;
3491             }else{ //MPEG1
3492                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3493                 if(s->mb_intra){
3494                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3495                         unquant_coeff =   (unquant_coeff - 1) | 1;
3496                 }else{
3497                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3498                         unquant_coeff =   (unquant_coeff - 1) | 1;
3499                 }
3500                 unquant_coeff<<= 3;
3501             }
3502
3503             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3504             level+=64;
3505             if((level&(~127)) == 0){
3506                 for(j=survivor_count-1; j>=0; j--){
3507                     int run= i - survivor[j];
3508                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3509                     score += score_tab[i-run];
3510
3511                     if(score < best_score){
3512                         best_score= score;
3513                         run_tab[i+1]= run;
3514                         level_tab[i+1]= level-64;
3515                     }
3516                 }
3517
3518                 if(s->out_format == FMT_H263){
3519                     for(j=survivor_count-1; j>=0; j--){
3520                         int run= i - survivor[j];
3521                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3522                         score += score_tab[i-run];
3523                         if(score < last_score){
3524                             last_score= score;
3525                             last_run= run;
3526                             last_level= level-64;
3527                             last_i= i+1;
3528                         }
3529                     }
3530                 }
3531             }else{
3532                 distortion += esc_length*lambda;
3533                 for(j=survivor_count-1; j>=0; j--){
3534                     int run= i - survivor[j];
3535                     int score= distortion + score_tab[i-run];
3536
3537                     if(score < best_score){
3538                         best_score= score;
3539                         run_tab[i+1]= run;
3540                         level_tab[i+1]= level-64;
3541                     }
3542                 }
3543
3544                 if(s->out_format == FMT_H263){
3545                   for(j=survivor_count-1; j>=0; j--){
3546                         int run= i - survivor[j];
3547                         int score= distortion + score_tab[i-run];
3548                         if(score < last_score){
3549                             last_score= score;
3550                             last_run= run;
3551                             last_level= level-64;
3552                             last_i= i+1;
3553                         }
3554                     }
3555                 }
3556             }
3557         }
3558
3559         score_tab[i+1]= best_score;
3560
3561         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3562         if(last_non_zero <= 27){
3563             for(; survivor_count; survivor_count--){
3564                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3565                     break;
3566             }
3567         }else{
3568             for(; survivor_count; survivor_count--){
3569                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3570                     break;
3571             }
3572         }
3573
3574         survivor[ survivor_count++ ]= i+1;
3575     }
3576
3577     if(s->out_format != FMT_H263){
3578         last_score= 256*256*256*120;
3579         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3580             int score= score_tab[i];
3581             if(i) score += lambda*2; //FIXME exacter?
3582
3583             if(score < last_score){
3584                 last_score= score;
3585                 last_i= i;
3586                 last_level= level_tab[i];
3587                 last_run= run_tab[i];
3588             }
3589         }
3590     }
3591
3592     s->coded_score[n] = last_score;
3593
3594     dc= FFABS(block[0]);
3595     last_non_zero= last_i - 1;
3596     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3597
3598     if(last_non_zero < start_i)
3599         return last_non_zero;
3600
3601     if(last_non_zero == 0 && start_i == 0){
3602         int best_level= 0;
3603         int best_score= dc * dc;
3604
3605         for(i=0; i<coeff_count[0]; i++){
3606             int level= coeff[i][0];
3607             int alevel= FFABS(level);
3608             int unquant_coeff, score, distortion;
3609
3610             if(s->out_format == FMT_H263){
3611                     unquant_coeff= (alevel*qmul + qadd)>>3;
3612             }else{ //MPEG1
3613                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3614                     unquant_coeff =   (unquant_coeff - 1) | 1;
3615             }
3616             unquant_coeff = (unquant_coeff + 4) >> 3;
3617             unquant_coeff<<= 3 + 3;
3618
3619             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3620             level+=64;
3621             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3622             else                    score= distortion + esc_length*lambda;
3623
3624             if(score < best_score){
3625                 best_score= score;
3626                 best_level= level - 64;
3627             }
3628         }
3629         block[0]= best_level;
3630         s->coded_score[n] = best_score - dc*dc;
3631         if(best_level == 0) return -1;
3632         else                return last_non_zero;
3633     }
3634
3635     i= last_i;
3636     assert(last_level);
3637
3638     block[ perm_scantable[last_non_zero] ]= last_level;
3639     i -= last_run + 1;
3640
3641     for(; i>start_i; i -= run_tab[i] + 1){
3642         block[ perm_scantable[i-1] ]= level_tab[i];
3643     }
3644
3645     return last_non_zero;
3646 }
3647
3648 //#define REFINE_STATS 1
3649 static int16_t basis[64][64];
3650
3651 static void build_basis(uint8_t *perm){
3652     int i, j, x, y;
3653     emms_c();
3654     for(i=0; i<8; i++){
3655         for(j=0; j<8; j++){
3656             for(y=0; y<8; y++){
3657                 for(x=0; x<8; x++){
3658                     double s= 0.25*(1<<BASIS_SHIFT);
3659                     int index= 8*i + j;
3660                     int perm_index= perm[index];
3661                     if(i==0) s*= sqrt(0.5);
3662                     if(j==0) s*= sqrt(0.5);
3663                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3664                 }
3665             }
3666         }
3667     }
3668 }
3669
3670 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3671                         int16_t *block, int16_t *weight, int16_t *orig,
3672                         int n, int qscale){
3673     int16_t rem[64];
3674     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3675     const uint8_t *scantable= s->intra_scantable.scantable;
3676     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3677 //    unsigned int threshold1, threshold2;
3678 //    int bias=0;
3679     int run_tab[65];
3680     int prev_run=0;
3681     int prev_level=0;
3682     int qmul, qadd, start_i, last_non_zero, i, dc;
3683     uint8_t * length;
3684     uint8_t * last_length;
3685     int lambda;
3686     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3687 #ifdef REFINE_STATS
3688 static int count=0;
3689 static int after_last=0;
3690 static int to_zero=0;
3691 static int from_zero=0;
3692 static int raise=0;
3693 static int lower=0;
3694 static int messed_sign=0;
3695 #endif
3696
3697     if(basis[0][0] == 0)
3698         build_basis(s->dsp.idct_permutation);
3699
3700     qmul= qscale*2;
3701     qadd= (qscale-1)|1;
3702     if (s->mb_intra) {
3703         if (!s->h263_aic) {
3704             if (n < 4)
3705                 q = s->y_dc_scale;
3706             else
3707                 q = s->c_dc_scale;
3708         } else{
3709             /* For AIC we skip quant/dequant of INTRADC */
3710             q = 1;
3711             qadd=0;
3712         }
3713         q <<= RECON_SHIFT-3;
3714         /* note: block[0] is assumed to be positive */
3715         dc= block[0]*q;
3716 //        block[0] = (block[0] + (q >> 1)) / q;
3717         start_i = 1;
3718 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3719 //            bias= 1<<(QMAT_SHIFT-1);
3720         length     = s->intra_ac_vlc_length;
3721         last_length= s->intra_ac_vlc_last_length;
3722     } else {
3723         dc= 0;
3724         start_i = 0;
3725         length     = s->inter_ac_vlc_length;
3726         last_length= s->inter_ac_vlc_last_length;
3727     }
3728     last_non_zero = s->block_last_index[n];
3729
3730 #ifdef REFINE_STATS
3731 {START_TIMER
3732 #endif
3733     dc += (1<<(RECON_SHIFT-1));
3734     for(i=0; i<64; i++){
3735         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3736     }
3737 #ifdef REFINE_STATS
3738 STOP_TIMER("memset rem[]")}
3739 #endif
3740     sum=0;
3741     for(i=0; i<64; i++){
3742         int one= 36;
3743         int qns=4;
3744         int w;
3745
3746         w= FFABS(weight[i]) + qns*one;
3747         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3748
3749         weight[i] = w;
3750 //        w=weight[i] = (63*qns + (w/2)) / w;
3751
3752         assert(w>0);
3753         assert(w<(1<<6));
3754         sum += w*w;
3755     }
3756     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3757 #ifdef REFINE_STATS
3758 {START_TIMER
3759 #endif
3760     run=0;
3761     rle_index=0;
3762     for(i=start_i; i<=last_non_zero; i++){
3763         int j= perm_scantable[i];
3764         const int level= block[j];
3765         int coeff;
3766
3767         if(level){
3768             if(level<0) coeff= qmul*level - qadd;
3769             else        coeff= qmul*level + qadd;
3770             run_tab[rle_index++]=run;
3771             run=0;
3772
3773             s->dsp.add_8x8basis(rem, basis[j], coeff);
3774         }else{
3775             run++;
3776         }
3777     }
3778 #ifdef REFINE_STATS
3779 if(last_non_zero>0){
3780 STOP_TIMER("init rem[]")
3781 }
3782 }
3783
3784 {START_TIMER
3785 #endif
3786     for(;;){
3787         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3788         int best_coeff=0;
3789         int best_change=0;
3790         int run2, best_unquant_change=0, analyze_gradient;
3791 #ifdef REFINE_STATS
3792 {START_TIMER
3793 #endif
3794         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3795
3796         if(analyze_gradient){
3797 #ifdef REFINE_STATS
3798 {START_TIMER
3799 #endif
3800             for(i=0; i<64; i++){
3801                 int w= weight[i];
3802
3803                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3804             }
3805 #ifdef REFINE_STATS
3806 STOP_TIMER("rem*w*w")}
3807 {START_TIMER
3808 #endif
3809             s->dsp.fdct(d1);
3810 #ifdef REFINE_STATS
3811 STOP_TIMER("dct")}
3812 #endif
3813         }
3814
3815         if(start_i){
3816             const int level= block[0];
3817             int change, old_coeff;
3818
3819             assert(s->mb_intra);
3820
3821             old_coeff= q*level;
3822
3823             for(change=-1; change<=1; change+=2){
3824                 int new_level= level + change;
3825                 int score, new_coeff;
3826
3827                 new_coeff= q*new_level;
3828                 if(new_coeff >= 2048 || new_coeff < 0)
3829                     continue;
3830
3831                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3832                 if(score<best_score){
3833                     best_score= score;
3834                     best_coeff= 0;
3835                     best_change= change;
3836                     best_unquant_change= new_coeff - old_coeff;
3837                 }
3838             }
3839         }
3840
3841         run=0;
3842         rle_index=0;
3843         run2= run_tab[rle_index++];
3844         prev_level=0;
3845         prev_run=0;
3846
3847         for(i=start_i; i<64; i++){
3848             int j= perm_scantable[i];
3849             const int level= block[j];
3850             int change, old_coeff;
3851
3852             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3853                 break;
3854
3855             if(level){
3856                 if(level<0) old_coeff= qmul*level - qadd;
3857                 else        old_coeff= qmul*level + qadd;
3858                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3859             }else{
3860                 old_coeff=0;
3861                 run2--;
3862                 assert(run2>=0 || i >= last_non_zero );
3863             }
3864
3865             for(change=-1; change<=1; change+=2){
3866                 int new_level= level + change;
3867                 int score, new_coeff, unquant_change;
3868
3869                 score=0;
3870                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3871                    continue;
3872
3873                 if(new_level){
3874                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3875                     else            new_coeff= qmul*new_level + qadd;
3876                     if(new_coeff >= 2048 || new_coeff <= -2048)
3877                         continue;
3878                     //FIXME check for overflow
3879
3880                     if(level){
3881                         if(level < 63 && level > -63){
3882                             if(i < last_non_zero)
3883                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3884                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3885                             else
3886                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3887                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3888                         }
3889                     }else{
3890                         assert(FFABS(new_level)==1);
3891
3892                         if(analyze_gradient){
3893                             int g= d1[ scantable[i] ];
3894                             if(g && (g^new_level) >= 0)
3895                                 continue;
3896                         }
3897
3898                         if(i < last_non_zero){
3899                             int next_i= i + run2 + 1;
3900                             int next_level= block[ perm_scantable[next_i] ] + 64;
3901
3902                             if(next_level&(~127))
3903                                 next_level= 0;
3904
3905                             if(next_i < last_non_zero)
3906                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3907                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3908                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3909                             else
3910                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3911                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3912                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3913                         }else{
3914                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3915                             if(prev_level){
3916                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3917                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3918                             }
3919                         }
3920                     }
3921                 }else{
3922                     new_coeff=0;
3923                     assert(FFABS(level)==1);
3924
3925                     if(i < last_non_zero){
3926                         int next_i= i + run2 + 1;
3927                         int next_level= block[ perm_scantable[next_i] ] + 64;
3928
3929                         if(next_level&(~127))
3930                             next_level= 0;
3931
3932                         if(next_i < last_non_zero)
3933                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3934                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3935                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3936                         else
3937                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3938                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3939                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3940                     }else{
3941                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3942                         if(prev_level){
3943                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3944                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3945                         }
3946                     }
3947                 }
3948
3949                 score *= lambda;
3950
3951                 unquant_change= new_coeff - old_coeff;
3952                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3953
3954                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3955                 if(score<best_score){
3956                     best_score= score;
3957                     best_coeff= i;
3958                     best_change= change;
3959                     best_unquant_change= unquant_change;
3960                 }
3961             }
3962             if(level){
3963                 prev_level= level + 64;
3964                 if(prev_level&(~127))
3965                     prev_level= 0;
3966                 prev_run= run;
3967                 run=0;
3968             }else{
3969                 run++;
3970             }
3971         }
3972 #ifdef REFINE_STATS
3973 STOP_TIMER("iterative step")}
3974 #endif
3975
3976         if(best_change){
3977             int j= perm_scantable[ best_coeff ];
3978
3979             block[j] += best_change;
3980
3981             if(best_coeff > last_non_zero){
3982                 last_non_zero= best_coeff;
3983                 assert(block[j]);
3984 #ifdef REFINE_STATS
3985 after_last++;
3986 #endif
3987             }else{
3988 #ifdef REFINE_STATS
3989 if(block[j]){
3990     if(block[j] - best_change){
3991         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3992             raise++;
3993         }else{
3994             lower++;
3995         }
3996     }else{
3997         from_zero++;
3998     }
3999 }else{
4000     to_zero++;
4001 }
4002 #endif
4003                 for(; last_non_zero>=start_i; last_non_zero--){
4004                     if(block[perm_scantable[last_non_zero]])
4005                         break;
4006                 }
4007             }
4008 #ifdef REFINE_STATS
4009 count++;
4010 if(256*256*256*64 % count == 0){
4011     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4012 }
4013 #endif
4014             run=0;
4015             rle_index=0;
4016             for(i=start_i; i<=last_non_zero; i++){
4017                 int j= perm_scantable[i];
4018                 const int level= block[j];
4019
4020                  if(level){
4021                      run_tab[rle_index++]=run;
4022                      run=0;
4023                  }else{
4024                      run++;
4025                  }
4026             }
4027
4028             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4029         }else{
4030             break;
4031         }
4032     }
4033 #ifdef REFINE_STATS
4034 if(last_non_zero>0){
4035 STOP_TIMER("iterative search")
4036 }
4037 }
4038 #endif
4039
4040     return last_non_zero;
4041 }
4042
4043 int ff_dct_quantize_c(MpegEncContext *s,
4044                         int16_t *block, int n,
4045                         int qscale, int *overflow)
4046 {
4047     int i, j, level, last_non_zero, q, start_i;
4048     const int *qmat;
4049     const uint8_t *scantable= s->intra_scantable.scantable;
4050     int bias;
4051     int max=0;
4052     unsigned int threshold1, threshold2;
4053
4054     s->dsp.fdct (block);
4055
4056     if(s->dct_error_sum)
4057         s->denoise_dct(s, block);
4058
4059     if (s->mb_intra) {
4060         if (!s->h263_aic) {
4061             if (n < 4)
4062                 q = s->y_dc_scale;
4063             else
4064                 q = s->c_dc_scale;
4065             q = q << 3;
4066         } else
4067             /* For AIC we skip quant/dequant of INTRADC */
4068             q = 1 << 3;
4069
4070         /* note: block[0] is assumed to be positive */
4071         block[0] = (block[0] + (q >> 1)) / q;
4072         start_i = 1;
4073         last_non_zero = 0;
4074         qmat = s->q_intra_matrix[qscale];
4075         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4076     } else {
4077         start_i = 0;
4078         last_non_zero = -1;
4079         qmat = s->q_inter_matrix[qscale];
4080         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4081     }
4082     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4083     threshold2= (threshold1<<1);
4084     for(i=63;i>=start_i;i--) {
4085         j = scantable[i];
4086         level = block[j] * qmat[j];
4087
4088         if(((unsigned)(level+threshold1))>threshold2){
4089             last_non_zero = i;
4090             break;
4091         }else{
4092             block[j]=0;
4093         }
4094     }
4095     for(i=start_i; i<=last_non_zero; i++) {
4096         j = scantable[i];
4097         level = block[j] * qmat[j];
4098
4099 //        if(   bias+level >= (1<<QMAT_SHIFT)
4100 //           || bias-level >= (1<<QMAT_SHIFT)){
4101         if(((unsigned)(level+threshold1))>threshold2){
4102             if(level>0){
4103                 level= (bias + level)>>QMAT_SHIFT;
4104                 block[j]= level;
4105             }else{
4106                 level= (bias - level)>>QMAT_SHIFT;
4107                 block[j]= -level;
4108             }
4109             max |=level;
4110         }else{
4111             block[j]=0;
4112         }
4113     }
4114     *overflow= s->max_qcoeff < max; //overflow might have happened
4115
4116     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4117     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4118         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4119
4120     return last_non_zero;
4121 }
4122
4123 #define OFFSET(x) offsetof(MpegEncContext, x)
4124 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4125 static const AVOption h263_options[] = {
4126     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4127     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4128     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4129     FF_MPV_COMMON_OPTS
4130     { NULL },
4131 };
4132
4133 static const AVClass h263_class = {
4134     .class_name = "H.263 encoder",
4135     .item_name  = av_default_item_name,
4136     .option     = h263_options,
4137     .version    = LIBAVUTIL_VERSION_INT,
4138 };
4139
4140 AVCodec ff_h263_encoder = {
4141     .name           = "h263",
4142     .type           = AVMEDIA_TYPE_VIDEO,
4143     .id             = AV_CODEC_ID_H263,
4144     .priv_data_size = sizeof(MpegEncContext),
4145     .init           = ff_MPV_encode_init,
4146     .encode2        = ff_MPV_encode_picture,
4147     .close          = ff_MPV_encode_end,
4148     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4149     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4150     .priv_class     = &h263_class,
4151 };
4152
4153 static const AVOption h263p_options[] = {
4154     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4155     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4156     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4157     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4158     FF_MPV_COMMON_OPTS
4159     { NULL },
4160 };
4161 static const AVClass h263p_class = {
4162     .class_name = "H.263p encoder",
4163     .item_name  = av_default_item_name,
4164     .option     = h263p_options,
4165     .version    = LIBAVUTIL_VERSION_INT,
4166 };
4167
4168 AVCodec ff_h263p_encoder = {
4169     .name           = "h263p",
4170     .type           = AVMEDIA_TYPE_VIDEO,
4171     .id             = AV_CODEC_ID_H263P,
4172     .priv_data_size = sizeof(MpegEncContext),
4173     .init           = ff_MPV_encode_init,
4174     .encode2        = ff_MPV_encode_picture,
4175     .close          = ff_MPV_encode_end,
4176     .capabilities   = CODEC_CAP_SLICE_THREADS,
4177     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4178     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4179     .priv_class     = &h263p_class,
4180 };
4181
4182 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4183
4184 AVCodec ff_msmpeg4v2_encoder = {
4185     .name           = "msmpeg4v2",
4186     .type           = AVMEDIA_TYPE_VIDEO,
4187     .id             = AV_CODEC_ID_MSMPEG4V2,
4188     .priv_data_size = sizeof(MpegEncContext),
4189     .init           = ff_MPV_encode_init,
4190     .encode2        = ff_MPV_encode_picture,
4191     .close          = ff_MPV_encode_end,
4192     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4193     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4194     .priv_class     = &msmpeg4v2_class,
4195 };
4196
4197 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4198
4199 AVCodec ff_msmpeg4v3_encoder = {
4200     .name           = "msmpeg4",
4201     .type           = AVMEDIA_TYPE_VIDEO,
4202     .id             = AV_CODEC_ID_MSMPEG4V3,
4203     .priv_data_size = sizeof(MpegEncContext),
4204     .init           = ff_MPV_encode_init,
4205     .encode2        = ff_MPV_encode_picture,
4206     .close          = ff_MPV_encode_end,
4207     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4208     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4209     .priv_class     = &msmpeg4v3_class,
4210 };
4211
4212 FF_MPV_GENERIC_CLASS(wmv1)
4213
4214 AVCodec ff_wmv1_encoder = {
4215     .name           = "wmv1",
4216     .type           = AVMEDIA_TYPE_VIDEO,
4217     .id             = AV_CODEC_ID_WMV1,
4218     .priv_data_size = sizeof(MpegEncContext),
4219     .init           = ff_MPV_encode_init,
4220     .encode2        = ff_MPV_encode_picture,
4221     .close          = ff_MPV_encode_end,
4222     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4223     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4224     .priv_class     = &wmv1_class,
4225 };