]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec: Drop long-deprecated imgconvert.h header
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mjpegenc.h"
38 #include "msmpeg4.h"
39 #include "faandct.h"
40 #include "thread.h"
41 #include "aandcttab.h"
42 #include "flv.h"
43 #include "mpeg4video.h"
44 #include "internal.h"
45 #include "bytestream.h"
46 #include <limits.h>
47
48 //#undef NDEBUG
49 //#include <assert.h>
50
51 static int encode_picture(MpegEncContext *s, int picture_number);
52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
53 static int sse_mb(MpegEncContext *s);
54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
56
57 /* enable all paranoid tests for rounding, overflows, etc... */
58 //#define PARANOID
59
60 //#define DEBUG
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 const AVOption ff_mpv_generic_options[] = {
66     FF_MPV_COMMON_OPTS
67     { NULL },
68 };
69
70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
71                        uint16_t (*qmat16)[2][64],
72                        const uint16_t *quant_matrix,
73                        int bias, int qmin, int qmax, int intra)
74 {
75     int qscale;
76     int shift = 0;
77
78     for (qscale = qmin; qscale <= qmax; qscale++) {
79         int i;
80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
82             dsp->fdct == ff_faandct) {
83             for (i = 0; i < 64; i++) {
84                 const int j = dsp->idct_permutation[i];
85                 /* 16 <= qscale * quant_matrix[i] <= 7905
86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
87                  *             19952 <=              x  <= 249205026
88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
89                  *           3444240 >= (1 << 36) / (x) >= 275 */
90
91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
92                                         (qscale * quant_matrix[j]));
93             }
94         } else if (dsp->fdct == ff_fdct_ifast) {
95             for (i = 0; i < 64; i++) {
96                 const int j = dsp->idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
104                                         (ff_aanscales[i] * qscale *
105                                          quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void copy_picture_attributes(MpegEncContext *s,
188                                     AVFrame *dst,
189                                     AVFrame *src)
190 {
191     int i;
192
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201
202     if (s->avctx->me_threshold) {
203         if (!src->motion_val[0])
204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
205         if (!src->mb_type)
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
207         if (!src->ref_index[0])
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
210             av_log(s->avctx, AV_LOG_ERROR,
211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
212                    src->motion_subsample_log2, dst->motion_subsample_log2);
213
214         memcpy(dst->mb_type, src->mb_type,
215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
216
217         for (i = 0; i < 2; i++) {
218             int stride = ((16 * s->mb_width ) >>
219                           src->motion_subsample_log2) + 1;
220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
221
222             if (src->motion_val[i] &&
223                 src->motion_val[i] != dst->motion_val[i]) {
224                 memcpy(dst->motion_val[i], src->motion_val[i],
225                        2 * stride * height * sizeof(int16_t));
226             }
227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
228                 memcpy(dst->ref_index[i], src->ref_index[i],
229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
230             }
231         }
232     }
233 }
234
235 static void update_duplicate_context_after_me(MpegEncContext *dst,
236                                               MpegEncContext *src)
237 {
238 #define COPY(a) dst->a= src->a
239     COPY(pict_type);
240     COPY(current_picture);
241     COPY(f_code);
242     COPY(b_code);
243     COPY(qscale);
244     COPY(lambda);
245     COPY(lambda2);
246     COPY(picture_in_gop_number);
247     COPY(gop_picture_number);
248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
249     COPY(progressive_frame);    // FIXME don't set in encode_header
250     COPY(partitioned_frame);    // FIXME don't set in encode_header
251 #undef COPY
252 }
253
254 /**
255  * Set the given MpegEncContext to defaults for encoding.
256  * the changed fields will not depend upon the prior state of the MpegEncContext.
257  */
258 static void MPV_encode_defaults(MpegEncContext *s)
259 {
260     int i;
261     ff_MPV_common_defaults(s);
262
263     for (i = -16; i < 16; i++) {
264         default_fcode_tab[i + MAX_MV] = 1;
265     }
266     s->me.mv_penalty = default_mv_penalty;
267     s->fcode_tab     = default_fcode_tab;
268 }
269
270 /* init video encoder */
271 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
272 {
273     MpegEncContext *s = avctx->priv_data;
274     int i;
275     int chroma_h_shift, chroma_v_shift;
276
277     MPV_encode_defaults(s);
278
279     switch (avctx->codec_id) {
280     case AV_CODEC_ID_MPEG2VIDEO:
281         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
282             avctx->pix_fmt != PIX_FMT_YUV422P) {
283             av_log(avctx, AV_LOG_ERROR,
284                    "only YUV420 and YUV422 are supported\n");
285             return -1;
286         }
287         break;
288     case AV_CODEC_ID_LJPEG:
289         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
290             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
292             avctx->pix_fmt != PIX_FMT_BGRA     &&
293             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
294               avctx->pix_fmt != PIX_FMT_YUV422P &&
295               avctx->pix_fmt != PIX_FMT_YUV444P) ||
296              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
297             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
298             return -1;
299         }
300         break;
301     case AV_CODEC_ID_MJPEG:
302         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
303             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
304             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
305               avctx->pix_fmt != PIX_FMT_YUV422P) ||
306              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
307             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
308             return -1;
309         }
310         break;
311     default:
312         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
313             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
314             return -1;
315         }
316     }
317
318     switch (avctx->pix_fmt) {
319     case PIX_FMT_YUVJ422P:
320     case PIX_FMT_YUV422P:
321         s->chroma_format = CHROMA_422;
322         break;
323     case PIX_FMT_YUVJ420P:
324     case PIX_FMT_YUV420P:
325     default:
326         s->chroma_format = CHROMA_420;
327         break;
328     }
329
330     s->bit_rate = avctx->bit_rate;
331     s->width    = avctx->width;
332     s->height   = avctx->height;
333     if (avctx->gop_size > 600 &&
334         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
335         av_log(avctx, AV_LOG_ERROR,
336                "Warning keyframe interval too large! reducing it ...\n");
337         avctx->gop_size = 600;
338     }
339     s->gop_size     = avctx->gop_size;
340     s->avctx        = avctx;
341     s->flags        = avctx->flags;
342     s->flags2       = avctx->flags2;
343     s->max_b_frames = avctx->max_b_frames;
344     s->codec_id     = avctx->codec->id;
345 #if FF_API_MPV_GLOBAL_OPTS
346     if (avctx->luma_elim_threshold)
347         s->luma_elim_threshold   = avctx->luma_elim_threshold;
348     if (avctx->chroma_elim_threshold)
349         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
350 #endif
351     s->strict_std_compliance = avctx->strict_std_compliance;
352     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
353     s->mpeg_quant         = avctx->mpeg_quant;
354     s->rtp_mode           = !!avctx->rtp_payload_size;
355     s->intra_dc_precision = avctx->intra_dc_precision;
356     s->user_specified_pts = AV_NOPTS_VALUE;
357
358     if (s->gop_size <= 1) {
359         s->intra_only = 1;
360         s->gop_size   = 12;
361     } else {
362         s->intra_only = 0;
363     }
364
365     s->me_method = avctx->me_method;
366
367     /* Fixed QSCALE */
368     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
369
370 #if FF_API_MPV_GLOBAL_OPTS
371     if (s->flags & CODEC_FLAG_QP_RD)
372         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
373 #endif
374
375     s->adaptive_quant = (s->avctx->lumi_masking ||
376                          s->avctx->dark_masking ||
377                          s->avctx->temporal_cplx_masking ||
378                          s->avctx->spatial_cplx_masking  ||
379                          s->avctx->p_masking      ||
380                          s->avctx->border_masking ||
381                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
382                         !s->fixed_qscale;
383
384     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
385
386     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
387         av_log(avctx, AV_LOG_ERROR,
388                "a vbv buffer size is needed, "
389                "for encoding with a maximum bitrate\n");
390         return -1;
391     }
392
393     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
394         av_log(avctx, AV_LOG_INFO,
395                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
396     }
397
398     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
399         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
400         return -1;
401     }
402
403     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
404         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
405         return -1;
406     }
407
408     if (avctx->rc_max_rate &&
409         avctx->rc_max_rate == avctx->bit_rate &&
410         avctx->rc_max_rate != avctx->rc_min_rate) {
411         av_log(avctx, AV_LOG_INFO,
412                "impossible bitrate constraints, this will fail\n");
413     }
414
415     if (avctx->rc_buffer_size &&
416         avctx->bit_rate * (int64_t)avctx->time_base.num >
417             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
418         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
419         return -1;
420     }
421
422     if (!s->fixed_qscale &&
423         avctx->bit_rate * av_q2d(avctx->time_base) >
424             avctx->bit_rate_tolerance) {
425         av_log(avctx, AV_LOG_ERROR,
426                "bitrate tolerance too small for bitrate\n");
427         return -1;
428     }
429
430     if (s->avctx->rc_max_rate &&
431         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
432         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
433          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
434         90000LL * (avctx->rc_buffer_size - 1) >
435             s->avctx->rc_max_rate * 0xFFFFLL) {
436         av_log(avctx, AV_LOG_INFO,
437                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
438                "specified vbv buffer is too large for the given bitrate!\n");
439     }
440
441     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
442         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
443         s->codec_id != AV_CODEC_ID_FLV1) {
444         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
445         return -1;
446     }
447
448     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
449         av_log(avctx, AV_LOG_ERROR,
450                "OBMC is only supported with simple mb decision\n");
451         return -1;
452     }
453
454     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
455         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
456         return -1;
457     }
458
459     if (s->max_b_frames                    &&
460         s->codec_id != AV_CODEC_ID_MPEG4      &&
461         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
462         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
463         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
464         return -1;
465     }
466
467     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
468          s->codec_id == AV_CODEC_ID_H263  ||
469          s->codec_id == AV_CODEC_ID_H263P) &&
470         (avctx->sample_aspect_ratio.num > 255 ||
471          avctx->sample_aspect_ratio.den > 255)) {
472         av_log(avctx, AV_LOG_ERROR,
473                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
474                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
475         return -1;
476     }
477
478     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
479         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
480         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
481         return -1;
482     }
483
484     // FIXME mpeg2 uses that too
485     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
486         av_log(avctx, AV_LOG_ERROR,
487                "mpeg2 style quantization not supported by codec\n");
488         return -1;
489     }
490
491 #if FF_API_MPV_GLOBAL_OPTS
492     if (s->flags & CODEC_FLAG_CBP_RD)
493         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
494 #endif
495
496     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
497         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
498         return -1;
499     }
500
501     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
502         s->avctx->mb_decision != FF_MB_DECISION_RD) {
503         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
504         return -1;
505     }
506
507     if (s->avctx->scenechange_threshold < 1000000000 &&
508         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
509         av_log(avctx, AV_LOG_ERROR,
510                "closed gop with scene change detection are not supported yet, "
511                "set threshold to 1000000000\n");
512         return -1;
513     }
514
515     if (s->flags & CODEC_FLAG_LOW_DELAY) {
516         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
517             av_log(avctx, AV_LOG_ERROR,
518                   "low delay forcing is only available for mpeg2\n");
519             return -1;
520         }
521         if (s->max_b_frames != 0) {
522             av_log(avctx, AV_LOG_ERROR,
523                    "b frames cannot be used with low delay\n");
524             return -1;
525         }
526     }
527
528     if (s->q_scale_type == 1) {
529         if (avctx->qmax > 12) {
530             av_log(avctx, AV_LOG_ERROR,
531                    "non linear quant only supports qmax <= 12 currently\n");
532             return -1;
533         }
534     }
535
536     if (s->avctx->thread_count > 1         &&
537         s->codec_id != AV_CODEC_ID_MPEG4      &&
538         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
539         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
540         (s->codec_id != AV_CODEC_ID_H263P)) {
541         av_log(avctx, AV_LOG_ERROR,
542                "multi threaded encoding not supported by codec\n");
543         return -1;
544     }
545
546     if (s->avctx->thread_count < 1) {
547         av_log(avctx, AV_LOG_ERROR,
548                "automatic thread number detection not supported by codec,"
549                "patch welcome\n");
550         return -1;
551     }
552
553     if (s->avctx->thread_count > 1)
554         s->rtp_mode = 1;
555
556     if (!avctx->time_base.den || !avctx->time_base.num) {
557         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
558         return -1;
559     }
560
561     i = (INT_MAX / 2 + 128) >> 8;
562     if (avctx->me_threshold >= i) {
563         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
564                i - 1);
565         return -1;
566     }
567     if (avctx->mb_threshold >= i) {
568         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
569                i - 1);
570         return -1;
571     }
572
573     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
574         av_log(avctx, AV_LOG_INFO,
575                "notice: b_frame_strategy only affects the first pass\n");
576         avctx->b_frame_strategy = 0;
577     }
578
579     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
580     if (i > 1) {
581         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
582         avctx->time_base.den /= i;
583         avctx->time_base.num /= i;
584         //return -1;
585     }
586
587     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
588         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
589         // (a + x * 3 / 8) / x
590         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
591         s->inter_quant_bias = 0;
592     } else {
593         s->intra_quant_bias = 0;
594         // (a - x / 4) / x
595         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
596     }
597
598     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
599         s->intra_quant_bias = avctx->intra_quant_bias;
600     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
601         s->inter_quant_bias = avctx->inter_quant_bias;
602
603     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
604                                   &chroma_v_shift);
605
606     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
607         s->avctx->time_base.den > (1 << 16) - 1) {
608         av_log(avctx, AV_LOG_ERROR,
609                "timebase %d/%d not supported by MPEG 4 standard, "
610                "the maximum admitted value for the timebase denominator "
611                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
612                (1 << 16) - 1);
613         return -1;
614     }
615     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
616
617 #if FF_API_MPV_GLOBAL_OPTS
618     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
619         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
620     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
621         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
622     if (avctx->quantizer_noise_shaping)
623         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
624 #endif
625
626     switch (avctx->codec->id) {
627     case AV_CODEC_ID_MPEG1VIDEO:
628         s->out_format = FMT_MPEG1;
629         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
630         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
631         break;
632     case AV_CODEC_ID_MPEG2VIDEO:
633         s->out_format = FMT_MPEG1;
634         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
635         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
636         s->rtp_mode   = 1;
637         break;
638     case AV_CODEC_ID_LJPEG:
639     case AV_CODEC_ID_MJPEG:
640         s->out_format = FMT_MJPEG;
641         s->intra_only = 1; /* force intra only for jpeg */
642         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
643             avctx->pix_fmt   == PIX_FMT_BGRA) {
644             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
645             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
646             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
647         } else {
648             s->mjpeg_vsample[0] = 2;
649             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
650             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
651             s->mjpeg_hsample[0] = 2;
652             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
653             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
654         }
655         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
656             ff_mjpeg_encode_init(s) < 0)
657             return -1;
658         avctx->delay = 0;
659         s->low_delay = 1;
660         break;
661     case AV_CODEC_ID_H261:
662         if (!CONFIG_H261_ENCODER)
663             return -1;
664         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
665             av_log(avctx, AV_LOG_ERROR,
666                    "The specified picture size of %dx%d is not valid for the "
667                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
668                     s->width, s->height);
669             return -1;
670         }
671         s->out_format = FMT_H261;
672         avctx->delay  = 0;
673         s->low_delay  = 1;
674         break;
675     case AV_CODEC_ID_H263:
676         if (!CONFIG_H263_ENCODER)
677         return -1;
678         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
679                              s->width, s->height) == 8) {
680             av_log(avctx, AV_LOG_INFO,
681                    "The specified picture size of %dx%d is not valid for "
682                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
683                    "352x288, 704x576, and 1408x1152."
684                    "Try H.263+.\n", s->width, s->height);
685             return -1;
686         }
687         s->out_format = FMT_H263;
688         avctx->delay  = 0;
689         s->low_delay  = 1;
690         break;
691     case AV_CODEC_ID_H263P:
692         s->out_format = FMT_H263;
693         s->h263_plus  = 1;
694         /* Fx */
695         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
696         s->modified_quant  = s->h263_aic;
697         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
698         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
699
700         /* /Fx */
701         /* These are just to be sure */
702         avctx->delay = 0;
703         s->low_delay = 1;
704         break;
705     case AV_CODEC_ID_FLV1:
706         s->out_format      = FMT_H263;
707         s->h263_flv        = 2; /* format = 1; 11-bit codes */
708         s->unrestricted_mv = 1;
709         s->rtp_mode  = 0; /* don't allow GOB */
710         avctx->delay = 0;
711         s->low_delay = 1;
712         break;
713     case AV_CODEC_ID_RV10:
714         s->out_format = FMT_H263;
715         avctx->delay  = 0;
716         s->low_delay  = 1;
717         break;
718     case AV_CODEC_ID_RV20:
719         s->out_format      = FMT_H263;
720         avctx->delay       = 0;
721         s->low_delay       = 1;
722         s->modified_quant  = 1;
723         s->h263_aic        = 1;
724         s->h263_plus       = 1;
725         s->loop_filter     = 1;
726         s->unrestricted_mv = 0;
727         break;
728     case AV_CODEC_ID_MPEG4:
729         s->out_format      = FMT_H263;
730         s->h263_pred       = 1;
731         s->unrestricted_mv = 1;
732         s->low_delay       = s->max_b_frames ? 0 : 1;
733         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
734         break;
735     case AV_CODEC_ID_MSMPEG4V2:
736         s->out_format      = FMT_H263;
737         s->h263_pred       = 1;
738         s->unrestricted_mv = 1;
739         s->msmpeg4_version = 2;
740         avctx->delay       = 0;
741         s->low_delay       = 1;
742         break;
743     case AV_CODEC_ID_MSMPEG4V3:
744         s->out_format        = FMT_H263;
745         s->h263_pred         = 1;
746         s->unrestricted_mv   = 1;
747         s->msmpeg4_version   = 3;
748         s->flipflop_rounding = 1;
749         avctx->delay         = 0;
750         s->low_delay         = 1;
751         break;
752     case AV_CODEC_ID_WMV1:
753         s->out_format        = FMT_H263;
754         s->h263_pred         = 1;
755         s->unrestricted_mv   = 1;
756         s->msmpeg4_version   = 4;
757         s->flipflop_rounding = 1;
758         avctx->delay         = 0;
759         s->low_delay         = 1;
760         break;
761     case AV_CODEC_ID_WMV2:
762         s->out_format        = FMT_H263;
763         s->h263_pred         = 1;
764         s->unrestricted_mv   = 1;
765         s->msmpeg4_version   = 5;
766         s->flipflop_rounding = 1;
767         avctx->delay         = 0;
768         s->low_delay         = 1;
769         break;
770     default:
771         return -1;
772     }
773
774     avctx->has_b_frames = !s->low_delay;
775
776     s->encoding = 1;
777
778     s->progressive_frame    =
779     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
780                                                 CODEC_FLAG_INTERLACED_ME) ||
781                                 s->alternate_scan);
782
783     /* init */
784     if (ff_MPV_common_init(s) < 0)
785         return -1;
786
787     if (ARCH_X86)
788         ff_MPV_encode_init_x86(s);
789
790     if (!s->dct_quantize)
791         s->dct_quantize = ff_dct_quantize_c;
792     if (!s->denoise_dct)
793         s->denoise_dct  = denoise_dct_c;
794     s->fast_dct_quantize = s->dct_quantize;
795     if (avctx->trellis)
796         s->dct_quantize  = dct_quantize_trellis_c;
797
798     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
799         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
800
801     s->quant_precision = 5;
802
803     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
804     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
805
806     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
807         ff_h261_encode_init(s);
808     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
809         ff_h263_encode_init(s);
810     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
811         ff_msmpeg4_encode_init(s);
812     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
813         && s->out_format == FMT_MPEG1)
814         ff_mpeg1_encode_init(s);
815
816     /* init q matrix */
817     for (i = 0; i < 64; i++) {
818         int j = s->dsp.idct_permutation[i];
819         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
820             s->mpeg_quant) {
821             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
822             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
823         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
824             s->intra_matrix[j] =
825             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
826         } else {
827             /* mpeg1/2 */
828             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
829             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
830         }
831         if (s->avctx->intra_matrix)
832             s->intra_matrix[j] = s->avctx->intra_matrix[i];
833         if (s->avctx->inter_matrix)
834             s->inter_matrix[j] = s->avctx->inter_matrix[i];
835     }
836
837     /* precompute matrix */
838     /* for mjpeg, we do include qscale in the matrix */
839     if (s->out_format != FMT_MJPEG) {
840         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
841                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
842                           31, 1);
843         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
844                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
845                           31, 0);
846     }
847
848     if (ff_rate_control_init(s) < 0)
849         return -1;
850
851     return 0;
852 }
853
854 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
855 {
856     MpegEncContext *s = avctx->priv_data;
857
858     ff_rate_control_uninit(s);
859
860     ff_MPV_common_end(s);
861     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
862         s->out_format == FMT_MJPEG)
863         ff_mjpeg_encode_close(s);
864
865     av_freep(&avctx->extradata);
866
867     return 0;
868 }
869
870 static int get_sae(uint8_t *src, int ref, int stride)
871 {
872     int x,y;
873     int acc = 0;
874
875     for (y = 0; y < 16; y++) {
876         for (x = 0; x < 16; x++) {
877             acc += FFABS(src[x + y * stride] - ref);
878         }
879     }
880
881     return acc;
882 }
883
884 static int get_intra_count(MpegEncContext *s, uint8_t *src,
885                            uint8_t *ref, int stride)
886 {
887     int x, y, w, h;
888     int acc = 0;
889
890     w = s->width  & ~15;
891     h = s->height & ~15;
892
893     for (y = 0; y < h; y += 16) {
894         for (x = 0; x < w; x += 16) {
895             int offset = x + y * stride;
896             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
897                                      16);
898             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
899             int sae  = get_sae(src + offset, mean, stride);
900
901             acc += sae + 500 < sad;
902         }
903     }
904     return acc;
905 }
906
907
908 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
909 {
910     AVFrame *pic = NULL;
911     int64_t pts;
912     int i;
913     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
914                                                  (s->low_delay ? 0 : 1);
915     int direct = 1;
916
917     if (pic_arg) {
918         pts = pic_arg->pts;
919         pic_arg->display_picture_number = s->input_picture_number++;
920
921         if (pts != AV_NOPTS_VALUE) {
922             if (s->user_specified_pts != AV_NOPTS_VALUE) {
923                 int64_t time = pts;
924                 int64_t last = s->user_specified_pts;
925
926                 if (time <= last) {
927                     av_log(s->avctx, AV_LOG_ERROR,
928                            "Error, Invalid timestamp=%"PRId64", "
929                            "last=%"PRId64"\n", pts, s->user_specified_pts);
930                     return -1;
931                 }
932
933                 if (!s->low_delay && pic_arg->display_picture_number == 1)
934                     s->dts_delta = time - last;
935             }
936             s->user_specified_pts = pts;
937         } else {
938             if (s->user_specified_pts != AV_NOPTS_VALUE) {
939                 s->user_specified_pts =
940                 pts = s->user_specified_pts + 1;
941                 av_log(s->avctx, AV_LOG_INFO,
942                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
943                        pts);
944             } else {
945                 pts = pic_arg->display_picture_number;
946             }
947         }
948     }
949
950   if (pic_arg) {
951     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
952         direct = 0;
953     if (pic_arg->linesize[0] != s->linesize)
954         direct = 0;
955     if (pic_arg->linesize[1] != s->uvlinesize)
956         direct = 0;
957     if (pic_arg->linesize[2] != s->uvlinesize)
958         direct = 0;
959
960     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
961     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
962
963     if (direct) {
964         i = ff_find_unused_picture(s, 1);
965         if (i < 0)
966             return i;
967
968         pic = &s->picture[i].f;
969         pic->reference = 3;
970
971         for (i = 0; i < 4; i++) {
972             pic->data[i]     = pic_arg->data[i];
973             pic->linesize[i] = pic_arg->linesize[i];
974         }
975         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
976             return -1;
977         }
978     } else {
979         i = ff_find_unused_picture(s, 0);
980         if (i < 0)
981             return i;
982
983         pic = &s->picture[i].f;
984         pic->reference = 3;
985
986         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
987             return -1;
988         }
989
990         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
991             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
992             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
993             // empty
994         } else {
995             int h_chroma_shift, v_chroma_shift;
996             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
997                                           &v_chroma_shift);
998
999             for (i = 0; i < 3; i++) {
1000                 int src_stride = pic_arg->linesize[i];
1001                 int dst_stride = i ? s->uvlinesize : s->linesize;
1002                 int h_shift = i ? h_chroma_shift : 0;
1003                 int v_shift = i ? v_chroma_shift : 0;
1004                 int w = s->width  >> h_shift;
1005                 int h = s->height >> v_shift;
1006                 uint8_t *src = pic_arg->data[i];
1007                 uint8_t *dst = pic->data[i];
1008
1009                 if (!s->avctx->rc_buffer_size)
1010                     dst += INPLACE_OFFSET;
1011
1012                 if (src_stride == dst_stride)
1013                     memcpy(dst, src, src_stride * h);
1014                 else {
1015                     while (h--) {
1016                         memcpy(dst, src, w);
1017                         dst += dst_stride;
1018                         src += src_stride;
1019                     }
1020                 }
1021             }
1022         }
1023     }
1024     copy_picture_attributes(s, pic, pic_arg);
1025     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1026   }
1027
1028     /* shift buffer entries */
1029     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1030         s->input_picture[i - 1] = s->input_picture[i];
1031
1032     s->input_picture[encoding_delay] = (Picture*) pic;
1033
1034     return 0;
1035 }
1036
1037 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1038 {
1039     int x, y, plane;
1040     int score = 0;
1041     int64_t score64 = 0;
1042
1043     for (plane = 0; plane < 3; plane++) {
1044         const int stride = p->f.linesize[plane];
1045         const int bw = plane ? 1 : 2;
1046         for (y = 0; y < s->mb_height * bw; y++) {
1047             for (x = 0; x < s->mb_width * bw; x++) {
1048                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1049                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1050                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1051                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1052
1053                 switch (s->avctx->frame_skip_exp) {
1054                 case 0: score    =  FFMAX(score, v);          break;
1055                 case 1: score   += FFABS(v);                  break;
1056                 case 2: score   += v * v;                     break;
1057                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1058                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1059                 }
1060             }
1061         }
1062     }
1063
1064     if (score)
1065         score64 = score;
1066
1067     if (score64 < s->avctx->frame_skip_threshold)
1068         return 1;
1069     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1070         return 1;
1071     return 0;
1072 }
1073
1074 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1075 {
1076     AVPacket pkt = { 0 };
1077     int ret, got_output;
1078
1079     av_init_packet(&pkt);
1080     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1081     if (ret < 0)
1082         return ret;
1083
1084     ret = pkt.size;
1085     av_free_packet(&pkt);
1086     return ret;
1087 }
1088
1089 static int estimate_best_b_count(MpegEncContext *s)
1090 {
1091     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1092     AVCodecContext *c = avcodec_alloc_context3(NULL);
1093     AVFrame input[FF_MAX_B_FRAMES + 2];
1094     const int scale = s->avctx->brd_scale;
1095     int i, j, out_size, p_lambda, b_lambda, lambda2;
1096     int64_t best_rd  = INT64_MAX;
1097     int best_b_count = -1;
1098
1099     assert(scale >= 0 && scale <= 3);
1100
1101     //emms_c();
1102     //s->next_picture_ptr->quality;
1103     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1104     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1105     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1106     if (!b_lambda) // FIXME we should do this somewhere else
1107         b_lambda = p_lambda;
1108     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1109                FF_LAMBDA_SHIFT;
1110
1111     c->width        = s->width  >> scale;
1112     c->height       = s->height >> scale;
1113     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1114                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1115     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1116     c->mb_decision  = s->avctx->mb_decision;
1117     c->me_cmp       = s->avctx->me_cmp;
1118     c->mb_cmp       = s->avctx->mb_cmp;
1119     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1120     c->pix_fmt      = PIX_FMT_YUV420P;
1121     c->time_base    = s->avctx->time_base;
1122     c->max_b_frames = s->max_b_frames;
1123
1124     if (avcodec_open2(c, codec, NULL) < 0)
1125         return -1;
1126
1127     for (i = 0; i < s->max_b_frames + 2; i++) {
1128         int ysize = c->width * c->height;
1129         int csize = (c->width / 2) * (c->height / 2);
1130         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1131                                                 s->next_picture_ptr;
1132
1133         avcodec_get_frame_defaults(&input[i]);
1134         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1135         input[i].data[1]     = input[i].data[0] + ysize;
1136         input[i].data[2]     = input[i].data[1] + csize;
1137         input[i].linesize[0] = c->width;
1138         input[i].linesize[1] =
1139         input[i].linesize[2] = c->width / 2;
1140
1141         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1142             pre_input = *pre_input_ptr;
1143
1144             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1145                 pre_input.f.data[0] += INPLACE_OFFSET;
1146                 pre_input.f.data[1] += INPLACE_OFFSET;
1147                 pre_input.f.data[2] += INPLACE_OFFSET;
1148             }
1149
1150             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1151                                  pre_input.f.data[0], pre_input.f.linesize[0],
1152                                  c->width,      c->height);
1153             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1154                                  pre_input.f.data[1], pre_input.f.linesize[1],
1155                                  c->width >> 1, c->height >> 1);
1156             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1157                                  pre_input.f.data[2], pre_input.f.linesize[2],
1158                                  c->width >> 1, c->height >> 1);
1159         }
1160     }
1161
1162     for (j = 0; j < s->max_b_frames + 1; j++) {
1163         int64_t rd = 0;
1164
1165         if (!s->input_picture[j])
1166             break;
1167
1168         c->error[0] = c->error[1] = c->error[2] = 0;
1169
1170         input[0].pict_type = AV_PICTURE_TYPE_I;
1171         input[0].quality   = 1 * FF_QP2LAMBDA;
1172
1173         out_size = encode_frame(c, &input[0]);
1174
1175         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1176
1177         for (i = 0; i < s->max_b_frames + 1; i++) {
1178             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1179
1180             input[i + 1].pict_type = is_p ?
1181                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1182             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1183
1184             out_size = encode_frame(c, &input[i + 1]);
1185
1186             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1187         }
1188
1189         /* get the delayed frames */
1190         while (out_size) {
1191             out_size = encode_frame(c, NULL);
1192             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1193         }
1194
1195         rd += c->error[0] + c->error[1] + c->error[2];
1196
1197         if (rd < best_rd) {
1198             best_rd = rd;
1199             best_b_count = j;
1200         }
1201     }
1202
1203     avcodec_close(c);
1204     av_freep(&c);
1205
1206     for (i = 0; i < s->max_b_frames + 2; i++) {
1207         av_freep(&input[i].data[0]);
1208     }
1209
1210     return best_b_count;
1211 }
1212
1213 static int select_input_picture(MpegEncContext *s)
1214 {
1215     int i;
1216
1217     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1218         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1219     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1220
1221     /* set next picture type & ordering */
1222     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1223         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1224             s->next_picture_ptr == NULL || s->intra_only) {
1225             s->reordered_input_picture[0] = s->input_picture[0];
1226             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1227             s->reordered_input_picture[0]->f.coded_picture_number =
1228                 s->coded_picture_number++;
1229         } else {
1230             int b_frames;
1231
1232             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1233                 if (s->picture_in_gop_number < s->gop_size &&
1234                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1235                     // FIXME check that te gop check above is +-1 correct
1236                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1237                     //       s->input_picture[0]->f.data[0],
1238                     //       s->input_picture[0]->pts);
1239
1240                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1241                         for (i = 0; i < 4; i++)
1242                             s->input_picture[0]->f.data[i] = NULL;
1243                         s->input_picture[0]->f.type = 0;
1244                     } else {
1245                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1246                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1247
1248                         s->avctx->release_buffer(s->avctx,
1249                                                  &s->input_picture[0]->f);
1250                     }
1251
1252                     emms_c();
1253                     ff_vbv_update(s, 0);
1254
1255                     goto no_output_pic;
1256                 }
1257             }
1258
1259             if (s->flags & CODEC_FLAG_PASS2) {
1260                 for (i = 0; i < s->max_b_frames + 1; i++) {
1261                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1262
1263                     if (pict_num >= s->rc_context.num_entries)
1264                         break;
1265                     if (!s->input_picture[i]) {
1266                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1267                         break;
1268                     }
1269
1270                     s->input_picture[i]->f.pict_type =
1271                         s->rc_context.entry[pict_num].new_pict_type;
1272                 }
1273             }
1274
1275             if (s->avctx->b_frame_strategy == 0) {
1276                 b_frames = s->max_b_frames;
1277                 while (b_frames && !s->input_picture[b_frames])
1278                     b_frames--;
1279             } else if (s->avctx->b_frame_strategy == 1) {
1280                 for (i = 1; i < s->max_b_frames + 1; i++) {
1281                     if (s->input_picture[i] &&
1282                         s->input_picture[i]->b_frame_score == 0) {
1283                         s->input_picture[i]->b_frame_score =
1284                             get_intra_count(s,
1285                                             s->input_picture[i    ]->f.data[0],
1286                                             s->input_picture[i - 1]->f.data[0],
1287                                             s->linesize) + 1;
1288                     }
1289                 }
1290                 for (i = 0; i < s->max_b_frames + 1; i++) {
1291                     if (s->input_picture[i] == NULL ||
1292                         s->input_picture[i]->b_frame_score - 1 >
1293                             s->mb_num / s->avctx->b_sensitivity)
1294                         break;
1295                 }
1296
1297                 b_frames = FFMAX(0, i - 1);
1298
1299                 /* reset scores */
1300                 for (i = 0; i < b_frames + 1; i++) {
1301                     s->input_picture[i]->b_frame_score = 0;
1302                 }
1303             } else if (s->avctx->b_frame_strategy == 2) {
1304                 b_frames = estimate_best_b_count(s);
1305             } else {
1306                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1307                 b_frames = 0;
1308             }
1309
1310             emms_c();
1311             //static int b_count = 0;
1312             //b_count += b_frames;
1313             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1314
1315             for (i = b_frames - 1; i >= 0; i--) {
1316                 int type = s->input_picture[i]->f.pict_type;
1317                 if (type && type != AV_PICTURE_TYPE_B)
1318                     b_frames = i;
1319             }
1320             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1321                 b_frames == s->max_b_frames) {
1322                 av_log(s->avctx, AV_LOG_ERROR,
1323                        "warning, too many b frames in a row\n");
1324             }
1325
1326             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1327                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1328                     s->gop_size > s->picture_in_gop_number) {
1329                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1330                 } else {
1331                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1332                         b_frames = 0;
1333                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1334                 }
1335             }
1336
1337             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1338                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1339                 b_frames--;
1340
1341             s->reordered_input_picture[0] = s->input_picture[b_frames];
1342             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1343                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1344             s->reordered_input_picture[0]->f.coded_picture_number =
1345                 s->coded_picture_number++;
1346             for (i = 0; i < b_frames; i++) {
1347                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1348                 s->reordered_input_picture[i + 1]->f.pict_type =
1349                     AV_PICTURE_TYPE_B;
1350                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1351                     s->coded_picture_number++;
1352             }
1353         }
1354     }
1355 no_output_pic:
1356     if (s->reordered_input_picture[0]) {
1357         s->reordered_input_picture[0]->f.reference =
1358            s->reordered_input_picture[0]->f.pict_type !=
1359                AV_PICTURE_TYPE_B ? 3 : 0;
1360
1361         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1362
1363         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1364             s->avctx->rc_buffer_size) {
1365             // input is a shared pix, so we can't modifiy it -> alloc a new
1366             // one & ensure that the shared one is reuseable
1367
1368             Picture *pic;
1369             int i = ff_find_unused_picture(s, 0);
1370             if (i < 0)
1371                 return i;
1372             pic = &s->picture[i];
1373
1374             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1375             if (ff_alloc_picture(s, pic, 0) < 0) {
1376                 return -1;
1377             }
1378
1379             /* mark us unused / free shared pic */
1380             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1381                 s->avctx->release_buffer(s->avctx,
1382                                          &s->reordered_input_picture[0]->f);
1383             for (i = 0; i < 4; i++)
1384                 s->reordered_input_picture[0]->f.data[i] = NULL;
1385             s->reordered_input_picture[0]->f.type = 0;
1386
1387             copy_picture_attributes(s, &pic->f,
1388                                     &s->reordered_input_picture[0]->f);
1389
1390             s->current_picture_ptr = pic;
1391         } else {
1392             // input is not a shared pix -> reuse buffer for current_pix
1393
1394             assert(s->reordered_input_picture[0]->f.type ==
1395                        FF_BUFFER_TYPE_USER ||
1396                    s->reordered_input_picture[0]->f.type ==
1397                        FF_BUFFER_TYPE_INTERNAL);
1398
1399             s->current_picture_ptr = s->reordered_input_picture[0];
1400             for (i = 0; i < 4; i++) {
1401                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1402             }
1403         }
1404         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1405
1406         s->picture_number = s->new_picture.f.display_picture_number;
1407         //printf("dpn:%d\n", s->picture_number);
1408     } else {
1409         memset(&s->new_picture, 0, sizeof(Picture));
1410     }
1411     return 0;
1412 }
1413
1414 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1415                           const AVFrame *pic_arg, int *got_packet)
1416 {
1417     MpegEncContext *s = avctx->priv_data;
1418     int i, stuffing_count, ret;
1419     int context_count = s->slice_context_count;
1420
1421     s->picture_in_gop_number++;
1422
1423     if (load_input_picture(s, pic_arg) < 0)
1424         return -1;
1425
1426     if (select_input_picture(s) < 0) {
1427         return -1;
1428     }
1429
1430     /* output? */
1431     if (s->new_picture.f.data[0]) {
1432         if (!pkt->data &&
1433             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1434             return ret;
1435         if (s->mb_info) {
1436             s->mb_info_ptr = av_packet_new_side_data(pkt,
1437                                  AV_PKT_DATA_H263_MB_INFO,
1438                                  s->mb_width*s->mb_height*12);
1439             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1440         }
1441
1442         for (i = 0; i < context_count; i++) {
1443             int start_y = s->thread_context[i]->start_mb_y;
1444             int   end_y = s->thread_context[i]->  end_mb_y;
1445             int h       = s->mb_height;
1446             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1447             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1448
1449             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1450         }
1451
1452         s->pict_type = s->new_picture.f.pict_type;
1453         //emms_c();
1454         //printf("qs:%f %f %d\n", s->new_picture.quality,
1455         //       s->current_picture.quality, s->qscale);
1456         ff_MPV_frame_start(s, avctx);
1457 vbv_retry:
1458         if (encode_picture(s, s->picture_number) < 0)
1459             return -1;
1460
1461         avctx->header_bits = s->header_bits;
1462         avctx->mv_bits     = s->mv_bits;
1463         avctx->misc_bits   = s->misc_bits;
1464         avctx->i_tex_bits  = s->i_tex_bits;
1465         avctx->p_tex_bits  = s->p_tex_bits;
1466         avctx->i_count     = s->i_count;
1467         // FIXME f/b_count in avctx
1468         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1469         avctx->skip_count  = s->skip_count;
1470
1471         ff_MPV_frame_end(s);
1472
1473         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1474             ff_mjpeg_encode_picture_trailer(s);
1475
1476         if (avctx->rc_buffer_size) {
1477             RateControlContext *rcc = &s->rc_context;
1478             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1479
1480             if (put_bits_count(&s->pb) > max_size &&
1481                 s->lambda < s->avctx->lmax) {
1482                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1483                                        (s->qscale + 1) / s->qscale);
1484                 if (s->adaptive_quant) {
1485                     int i;
1486                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1487                         s->lambda_table[i] =
1488                             FFMAX(s->lambda_table[i] + 1,
1489                                   s->lambda_table[i] * (s->qscale + 1) /
1490                                   s->qscale);
1491                 }
1492                 s->mb_skipped = 0;        // done in MPV_frame_start()
1493                 // done in encode_picture() so we must undo it
1494                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1495                     if (s->flipflop_rounding          ||
1496                         s->codec_id == AV_CODEC_ID_H263P ||
1497                         s->codec_id == AV_CODEC_ID_MPEG4)
1498                         s->no_rounding ^= 1;
1499                 }
1500                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1501                     s->time_base       = s->last_time_base;
1502                     s->last_non_b_time = s->time - s->pp_time;
1503                 }
1504                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1505                 for (i = 0; i < context_count; i++) {
1506                     PutBitContext *pb = &s->thread_context[i]->pb;
1507                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1508                 }
1509                 goto vbv_retry;
1510             }
1511
1512             assert(s->avctx->rc_max_rate);
1513         }
1514
1515         if (s->flags & CODEC_FLAG_PASS1)
1516             ff_write_pass1_stats(s);
1517
1518         for (i = 0; i < 4; i++) {
1519             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1520             avctx->error[i] += s->current_picture_ptr->f.error[i];
1521         }
1522
1523         if (s->flags & CODEC_FLAG_PASS1)
1524             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1525                    avctx->i_tex_bits + avctx->p_tex_bits ==
1526                        put_bits_count(&s->pb));
1527         flush_put_bits(&s->pb);
1528         s->frame_bits  = put_bits_count(&s->pb);
1529
1530         stuffing_count = ff_vbv_update(s, s->frame_bits);
1531         if (stuffing_count) {
1532             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1533                     stuffing_count + 50) {
1534                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1535                 return -1;
1536             }
1537
1538             switch (s->codec_id) {
1539             case AV_CODEC_ID_MPEG1VIDEO:
1540             case AV_CODEC_ID_MPEG2VIDEO:
1541                 while (stuffing_count--) {
1542                     put_bits(&s->pb, 8, 0);
1543                 }
1544             break;
1545             case AV_CODEC_ID_MPEG4:
1546                 put_bits(&s->pb, 16, 0);
1547                 put_bits(&s->pb, 16, 0x1C3);
1548                 stuffing_count -= 4;
1549                 while (stuffing_count--) {
1550                     put_bits(&s->pb, 8, 0xFF);
1551                 }
1552             break;
1553             default:
1554                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1555             }
1556             flush_put_bits(&s->pb);
1557             s->frame_bits  = put_bits_count(&s->pb);
1558         }
1559
1560         /* update mpeg1/2 vbv_delay for CBR */
1561         if (s->avctx->rc_max_rate                          &&
1562             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1563             s->out_format == FMT_MPEG1                     &&
1564             90000LL * (avctx->rc_buffer_size - 1) <=
1565                 s->avctx->rc_max_rate * 0xFFFFLL) {
1566             int vbv_delay, min_delay;
1567             double inbits  = s->avctx->rc_max_rate *
1568                              av_q2d(s->avctx->time_base);
1569             int    minbits = s->frame_bits - 8 *
1570                              (s->vbv_delay_ptr - s->pb.buf - 1);
1571             double bits    = s->rc_context.buffer_index + minbits - inbits;
1572
1573             if (bits < 0)
1574                 av_log(s->avctx, AV_LOG_ERROR,
1575                        "Internal error, negative bits\n");
1576
1577             assert(s->repeat_first_field == 0);
1578
1579             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1580             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1581                         s->avctx->rc_max_rate;
1582
1583             vbv_delay = FFMAX(vbv_delay, min_delay);
1584
1585             assert(vbv_delay < 0xFFFF);
1586
1587             s->vbv_delay_ptr[0] &= 0xF8;
1588             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1589             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1590             s->vbv_delay_ptr[2] &= 0x07;
1591             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1592             avctx->vbv_delay     = vbv_delay * 300;
1593         }
1594         s->total_bits     += s->frame_bits;
1595         avctx->frame_bits  = s->frame_bits;
1596
1597         pkt->pts = s->current_picture.f.pts;
1598         if (!s->low_delay) {
1599             if (!s->current_picture.f.coded_picture_number)
1600                 pkt->dts = pkt->pts - s->dts_delta;
1601             else
1602                 pkt->dts = s->reordered_pts;
1603             s->reordered_pts = s->input_picture[0]->f.pts;
1604         } else
1605             pkt->dts = pkt->pts;
1606         if (s->current_picture.f.key_frame)
1607             pkt->flags |= AV_PKT_FLAG_KEY;
1608         if (s->mb_info)
1609             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1610     } else {
1611         s->frame_bits = 0;
1612     }
1613     assert((s->frame_bits & 7) == 0);
1614
1615     pkt->size = s->frame_bits / 8;
1616     *got_packet = !!pkt->size;
1617     return 0;
1618 }
1619
1620 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1621                                                 int n, int threshold)
1622 {
1623     static const char tab[64] = {
1624         3, 2, 2, 1, 1, 1, 1, 1,
1625         1, 1, 1, 1, 1, 1, 1, 1,
1626         1, 1, 1, 1, 1, 1, 1, 1,
1627         0, 0, 0, 0, 0, 0, 0, 0,
1628         0, 0, 0, 0, 0, 0, 0, 0,
1629         0, 0, 0, 0, 0, 0, 0, 0,
1630         0, 0, 0, 0, 0, 0, 0, 0,
1631         0, 0, 0, 0, 0, 0, 0, 0
1632     };
1633     int score = 0;
1634     int run = 0;
1635     int i;
1636     DCTELEM *block = s->block[n];
1637     const int last_index = s->block_last_index[n];
1638     int skip_dc;
1639
1640     if (threshold < 0) {
1641         skip_dc = 0;
1642         threshold = -threshold;
1643     } else
1644         skip_dc = 1;
1645
1646     /* Are all we could set to zero already zero? */
1647     if (last_index <= skip_dc - 1)
1648         return;
1649
1650     for (i = 0; i <= last_index; i++) {
1651         const int j = s->intra_scantable.permutated[i];
1652         const int level = FFABS(block[j]);
1653         if (level == 1) {
1654             if (skip_dc && i == 0)
1655                 continue;
1656             score += tab[run];
1657             run = 0;
1658         } else if (level > 1) {
1659             return;
1660         } else {
1661             run++;
1662         }
1663     }
1664     if (score >= threshold)
1665         return;
1666     for (i = skip_dc; i <= last_index; i++) {
1667         const int j = s->intra_scantable.permutated[i];
1668         block[j] = 0;
1669     }
1670     if (block[0])
1671         s->block_last_index[n] = 0;
1672     else
1673         s->block_last_index[n] = -1;
1674 }
1675
1676 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1677                                int last_index)
1678 {
1679     int i;
1680     const int maxlevel = s->max_qcoeff;
1681     const int minlevel = s->min_qcoeff;
1682     int overflow = 0;
1683
1684     if (s->mb_intra) {
1685         i = 1; // skip clipping of intra dc
1686     } else
1687         i = 0;
1688
1689     for (; i <= last_index; i++) {
1690         const int j = s->intra_scantable.permutated[i];
1691         int level = block[j];
1692
1693         if (level > maxlevel) {
1694             level = maxlevel;
1695             overflow++;
1696         } else if (level < minlevel) {
1697             level = minlevel;
1698             overflow++;
1699         }
1700
1701         block[j] = level;
1702     }
1703
1704     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1705         av_log(s->avctx, AV_LOG_INFO,
1706                "warning, clipping %d dct coefficients to %d..%d\n",
1707                overflow, minlevel, maxlevel);
1708 }
1709
1710 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1711 {
1712     int x, y;
1713     // FIXME optimize
1714     for (y = 0; y < 8; y++) {
1715         for (x = 0; x < 8; x++) {
1716             int x2, y2;
1717             int sum = 0;
1718             int sqr = 0;
1719             int count = 0;
1720
1721             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1722                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1723                     int v = ptr[x2 + y2 * stride];
1724                     sum += v;
1725                     sqr += v * v;
1726                     count++;
1727                 }
1728             }
1729             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1730         }
1731     }
1732 }
1733
1734 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1735                                                 int motion_x, int motion_y,
1736                                                 int mb_block_height,
1737                                                 int mb_block_count)
1738 {
1739     int16_t weight[8][64];
1740     DCTELEM orig[8][64];
1741     const int mb_x = s->mb_x;
1742     const int mb_y = s->mb_y;
1743     int i;
1744     int skip_dct[8];
1745     int dct_offset = s->linesize * 8; // default for progressive frames
1746     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1747     int wrap_y, wrap_c;
1748
1749     for (i = 0; i < mb_block_count; i++)
1750         skip_dct[i] = s->skipdct;
1751
1752     if (s->adaptive_quant) {
1753         const int last_qp = s->qscale;
1754         const int mb_xy = mb_x + mb_y * s->mb_stride;
1755
1756         s->lambda = s->lambda_table[mb_xy];
1757         update_qscale(s);
1758
1759         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1760             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1761             s->dquant = s->qscale - last_qp;
1762
1763             if (s->out_format == FMT_H263) {
1764                 s->dquant = av_clip(s->dquant, -2, 2);
1765
1766                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1767                     if (!s->mb_intra) {
1768                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1769                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1770                                 s->dquant = 0;
1771                         }
1772                         if (s->mv_type == MV_TYPE_8X8)
1773                             s->dquant = 0;
1774                     }
1775                 }
1776             }
1777         }
1778         ff_set_qscale(s, last_qp + s->dquant);
1779     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1780         ff_set_qscale(s, s->qscale + s->dquant);
1781
1782     wrap_y = s->linesize;
1783     wrap_c = s->uvlinesize;
1784     ptr_y  = s->new_picture.f.data[0] +
1785              (mb_y * 16 * wrap_y)              + mb_x * 16;
1786     ptr_cb = s->new_picture.f.data[1] +
1787              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1788     ptr_cr = s->new_picture.f.data[2] +
1789              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1790
1791     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1792         uint8_t *ebuf = s->edge_emu_buffer + 32;
1793         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1794                                 mb_y * 16, s->width, s->height);
1795         ptr_y = ebuf;
1796         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1797                                 mb_block_height, mb_x * 8, mb_y * 8,
1798                                 s->width >> 1, s->height >> 1);
1799         ptr_cb = ebuf + 18 * wrap_y;
1800         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1801                                 mb_block_height, mb_x * 8, mb_y * 8,
1802                                 s->width >> 1, s->height >> 1);
1803         ptr_cr = ebuf + 18 * wrap_y + 8;
1804     }
1805
1806     if (s->mb_intra) {
1807         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1808             int progressive_score, interlaced_score;
1809
1810             s->interlaced_dct = 0;
1811             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1812                                                     NULL, wrap_y, 8) +
1813                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1814                                                     NULL, wrap_y, 8) - 400;
1815
1816             if (progressive_score > 0) {
1817                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1818                                                        NULL, wrap_y * 2, 8) +
1819                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1820                                                        NULL, wrap_y * 2, 8);
1821                 if (progressive_score > interlaced_score) {
1822                     s->interlaced_dct = 1;
1823
1824                     dct_offset = wrap_y;
1825                     wrap_y <<= 1;
1826                     if (s->chroma_format == CHROMA_422)
1827                         wrap_c <<= 1;
1828                 }
1829             }
1830         }
1831
1832         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1833         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1834         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1835         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1836
1837         if (s->flags & CODEC_FLAG_GRAY) {
1838             skip_dct[4] = 1;
1839             skip_dct[5] = 1;
1840         } else {
1841             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1842             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1843             if (!s->chroma_y_shift) { /* 422 */
1844                 s->dsp.get_pixels(s->block[6],
1845                                   ptr_cb + (dct_offset >> 1), wrap_c);
1846                 s->dsp.get_pixels(s->block[7],
1847                                   ptr_cr + (dct_offset >> 1), wrap_c);
1848             }
1849         }
1850     } else {
1851         op_pixels_func (*op_pix)[4];
1852         qpel_mc_func (*op_qpix)[16];
1853         uint8_t *dest_y, *dest_cb, *dest_cr;
1854
1855         dest_y  = s->dest[0];
1856         dest_cb = s->dest[1];
1857         dest_cr = s->dest[2];
1858
1859         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1860             op_pix  = s->dsp.put_pixels_tab;
1861             op_qpix = s->dsp.put_qpel_pixels_tab;
1862         } else {
1863             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1864             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1865         }
1866
1867         if (s->mv_dir & MV_DIR_FORWARD) {
1868             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1869                           s->last_picture.f.data,
1870                           op_pix, op_qpix);
1871             op_pix  = s->dsp.avg_pixels_tab;
1872             op_qpix = s->dsp.avg_qpel_pixels_tab;
1873         }
1874         if (s->mv_dir & MV_DIR_BACKWARD) {
1875             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1876                           s->next_picture.f.data,
1877                           op_pix, op_qpix);
1878         }
1879
1880         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1881             int progressive_score, interlaced_score;
1882
1883             s->interlaced_dct = 0;
1884             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1885                                                     ptr_y,              wrap_y,
1886                                                     8) +
1887                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1888                                                     ptr_y + wrap_y * 8, wrap_y,
1889                                                     8) - 400;
1890
1891             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1892                 progressive_score -= 400;
1893
1894             if (progressive_score > 0) {
1895                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1896                                                        ptr_y,
1897                                                        wrap_y * 2, 8) +
1898                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1899                                                        ptr_y + wrap_y,
1900                                                        wrap_y * 2, 8);
1901
1902                 if (progressive_score > interlaced_score) {
1903                     s->interlaced_dct = 1;
1904
1905                     dct_offset = wrap_y;
1906                     wrap_y <<= 1;
1907                     if (s->chroma_format == CHROMA_422)
1908                         wrap_c <<= 1;
1909                 }
1910             }
1911         }
1912
1913         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1914         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1915         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1916                            dest_y + dct_offset, wrap_y);
1917         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1918                            dest_y + dct_offset + 8, wrap_y);
1919
1920         if (s->flags & CODEC_FLAG_GRAY) {
1921             skip_dct[4] = 1;
1922             skip_dct[5] = 1;
1923         } else {
1924             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1925             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1926             if (!s->chroma_y_shift) { /* 422 */
1927                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1928                                    dest_cb + (dct_offset >> 1), wrap_c);
1929                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1930                                    dest_cr + (dct_offset >> 1), wrap_c);
1931             }
1932         }
1933         /* pre quantization */
1934         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1935                 2 * s->qscale * s->qscale) {
1936             // FIXME optimize
1937             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1938                               wrap_y, 8) < 20 * s->qscale)
1939                 skip_dct[0] = 1;
1940             if (s->dsp.sad[1](NULL, ptr_y + 8,
1941                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1942                 skip_dct[1] = 1;
1943             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1944                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1945                 skip_dct[2] = 1;
1946             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1947                               dest_y + dct_offset + 8,
1948                               wrap_y, 8) < 20 * s->qscale)
1949                 skip_dct[3] = 1;
1950             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1951                               wrap_c, 8) < 20 * s->qscale)
1952                 skip_dct[4] = 1;
1953             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1954                               wrap_c, 8) < 20 * s->qscale)
1955                 skip_dct[5] = 1;
1956             if (!s->chroma_y_shift) { /* 422 */
1957                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1958                                   dest_cb + (dct_offset >> 1),
1959                                   wrap_c, 8) < 20 * s->qscale)
1960                     skip_dct[6] = 1;
1961                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1962                                   dest_cr + (dct_offset >> 1),
1963                                   wrap_c, 8) < 20 * s->qscale)
1964                     skip_dct[7] = 1;
1965             }
1966         }
1967     }
1968
1969     if (s->quantizer_noise_shaping) {
1970         if (!skip_dct[0])
1971             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1972         if (!skip_dct[1])
1973             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1974         if (!skip_dct[2])
1975             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1976         if (!skip_dct[3])
1977             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1978         if (!skip_dct[4])
1979             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1980         if (!skip_dct[5])
1981             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1982         if (!s->chroma_y_shift) { /* 422 */
1983             if (!skip_dct[6])
1984                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1985                                   wrap_c);
1986             if (!skip_dct[7])
1987                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1988                                   wrap_c);
1989         }
1990         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1991     }
1992
1993     /* DCT & quantize */
1994     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1995     {
1996         for (i = 0; i < mb_block_count; i++) {
1997             if (!skip_dct[i]) {
1998                 int overflow;
1999                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2000                 // FIXME we could decide to change to quantizer instead of
2001                 // clipping
2002                 // JS: I don't think that would be a good idea it could lower
2003                 //     quality instead of improve it. Just INTRADC clipping
2004                 //     deserves changes in quantizer
2005                 if (overflow)
2006                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2007             } else
2008                 s->block_last_index[i] = -1;
2009         }
2010         if (s->quantizer_noise_shaping) {
2011             for (i = 0; i < mb_block_count; i++) {
2012                 if (!skip_dct[i]) {
2013                     s->block_last_index[i] =
2014                         dct_quantize_refine(s, s->block[i], weight[i],
2015                                             orig[i], i, s->qscale);
2016                 }
2017             }
2018         }
2019
2020         if (s->luma_elim_threshold && !s->mb_intra)
2021             for (i = 0; i < 4; i++)
2022                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2023         if (s->chroma_elim_threshold && !s->mb_intra)
2024             for (i = 4; i < mb_block_count; i++)
2025                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2026
2027         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2028             for (i = 0; i < mb_block_count; i++) {
2029                 if (s->block_last_index[i] == -1)
2030                     s->coded_score[i] = INT_MAX / 256;
2031             }
2032         }
2033     }
2034
2035     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2036         s->block_last_index[4] =
2037         s->block_last_index[5] = 0;
2038         s->block[4][0] =
2039         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2040     }
2041
2042     // non c quantize code returns incorrect block_last_index FIXME
2043     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2044         for (i = 0; i < mb_block_count; i++) {
2045             int j;
2046             if (s->block_last_index[i] > 0) {
2047                 for (j = 63; j > 0; j--) {
2048                     if (s->block[i][s->intra_scantable.permutated[j]])
2049                         break;
2050                 }
2051                 s->block_last_index[i] = j;
2052             }
2053         }
2054     }
2055
2056     /* huffman encode */
2057     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2058     case AV_CODEC_ID_MPEG1VIDEO:
2059     case AV_CODEC_ID_MPEG2VIDEO:
2060         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2061             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2062         break;
2063     case AV_CODEC_ID_MPEG4:
2064         if (CONFIG_MPEG4_ENCODER)
2065             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case AV_CODEC_ID_MSMPEG4V2:
2068     case AV_CODEC_ID_MSMPEG4V3:
2069     case AV_CODEC_ID_WMV1:
2070         if (CONFIG_MSMPEG4_ENCODER)
2071             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2072         break;
2073     case AV_CODEC_ID_WMV2:
2074         if (CONFIG_WMV2_ENCODER)
2075             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2076         break;
2077     case AV_CODEC_ID_H261:
2078         if (CONFIG_H261_ENCODER)
2079             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2080         break;
2081     case AV_CODEC_ID_H263:
2082     case AV_CODEC_ID_H263P:
2083     case AV_CODEC_ID_FLV1:
2084     case AV_CODEC_ID_RV10:
2085     case AV_CODEC_ID_RV20:
2086         if (CONFIG_H263_ENCODER)
2087             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2088         break;
2089     case AV_CODEC_ID_MJPEG:
2090         if (CONFIG_MJPEG_ENCODER)
2091             ff_mjpeg_encode_mb(s, s->block);
2092         break;
2093     default:
2094         assert(0);
2095     }
2096 }
2097
2098 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2099 {
2100     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2101     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2102 }
2103
2104 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2105     int i;
2106
2107     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2108
2109     /* mpeg1 */
2110     d->mb_skip_run= s->mb_skip_run;
2111     for(i=0; i<3; i++)
2112         d->last_dc[i] = s->last_dc[i];
2113
2114     /* statistics */
2115     d->mv_bits= s->mv_bits;
2116     d->i_tex_bits= s->i_tex_bits;
2117     d->p_tex_bits= s->p_tex_bits;
2118     d->i_count= s->i_count;
2119     d->f_count= s->f_count;
2120     d->b_count= s->b_count;
2121     d->skip_count= s->skip_count;
2122     d->misc_bits= s->misc_bits;
2123     d->last_bits= 0;
2124
2125     d->mb_skipped= 0;
2126     d->qscale= s->qscale;
2127     d->dquant= s->dquant;
2128
2129     d->esc3_level_length= s->esc3_level_length;
2130 }
2131
2132 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2133     int i;
2134
2135     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2136     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2137
2138     /* mpeg1 */
2139     d->mb_skip_run= s->mb_skip_run;
2140     for(i=0; i<3; i++)
2141         d->last_dc[i] = s->last_dc[i];
2142
2143     /* statistics */
2144     d->mv_bits= s->mv_bits;
2145     d->i_tex_bits= s->i_tex_bits;
2146     d->p_tex_bits= s->p_tex_bits;
2147     d->i_count= s->i_count;
2148     d->f_count= s->f_count;
2149     d->b_count= s->b_count;
2150     d->skip_count= s->skip_count;
2151     d->misc_bits= s->misc_bits;
2152
2153     d->mb_intra= s->mb_intra;
2154     d->mb_skipped= s->mb_skipped;
2155     d->mv_type= s->mv_type;
2156     d->mv_dir= s->mv_dir;
2157     d->pb= s->pb;
2158     if(s->data_partitioning){
2159         d->pb2= s->pb2;
2160         d->tex_pb= s->tex_pb;
2161     }
2162     d->block= s->block;
2163     for(i=0; i<8; i++)
2164         d->block_last_index[i]= s->block_last_index[i];
2165     d->interlaced_dct= s->interlaced_dct;
2166     d->qscale= s->qscale;
2167
2168     d->esc3_level_length= s->esc3_level_length;
2169 }
2170
2171 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2172                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2173                            int *dmin, int *next_block, int motion_x, int motion_y)
2174 {
2175     int score;
2176     uint8_t *dest_backup[3];
2177
2178     copy_context_before_encode(s, backup, type);
2179
2180     s->block= s->blocks[*next_block];
2181     s->pb= pb[*next_block];
2182     if(s->data_partitioning){
2183         s->pb2   = pb2   [*next_block];
2184         s->tex_pb= tex_pb[*next_block];
2185     }
2186
2187     if(*next_block){
2188         memcpy(dest_backup, s->dest, sizeof(s->dest));
2189         s->dest[0] = s->rd_scratchpad;
2190         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2191         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2192         assert(s->linesize >= 32); //FIXME
2193     }
2194
2195     encode_mb(s, motion_x, motion_y);
2196
2197     score= put_bits_count(&s->pb);
2198     if(s->data_partitioning){
2199         score+= put_bits_count(&s->pb2);
2200         score+= put_bits_count(&s->tex_pb);
2201     }
2202
2203     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2204         ff_MPV_decode_mb(s, s->block);
2205
2206         score *= s->lambda2;
2207         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2208     }
2209
2210     if(*next_block){
2211         memcpy(s->dest, dest_backup, sizeof(s->dest));
2212     }
2213
2214     if(score<*dmin){
2215         *dmin= score;
2216         *next_block^=1;
2217
2218         copy_context_after_encode(best, s, type);
2219     }
2220 }
2221
2222 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2223     uint32_t *sq = ff_squareTbl + 256;
2224     int acc=0;
2225     int x,y;
2226
2227     if(w==16 && h==16)
2228         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2229     else if(w==8 && h==8)
2230         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2231
2232     for(y=0; y<h; y++){
2233         for(x=0; x<w; x++){
2234             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2235         }
2236     }
2237
2238     assert(acc>=0);
2239
2240     return acc;
2241 }
2242
2243 static int sse_mb(MpegEncContext *s){
2244     int w= 16;
2245     int h= 16;
2246
2247     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2248     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2249
2250     if(w==16 && h==16)
2251       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2252         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2253                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2254                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2255       }else{
2256         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2257                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2258                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2259       }
2260     else
2261         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2262                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2263                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2264 }
2265
2266 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2267     MpegEncContext *s= *(void**)arg;
2268
2269
2270     s->me.pre_pass=1;
2271     s->me.dia_size= s->avctx->pre_dia_size;
2272     s->first_slice_line=1;
2273     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2274         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2275             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2276         }
2277         s->first_slice_line=0;
2278     }
2279
2280     s->me.pre_pass=0;
2281
2282     return 0;
2283 }
2284
2285 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2286     MpegEncContext *s= *(void**)arg;
2287
2288     ff_check_alignment();
2289
2290     s->me.dia_size= s->avctx->dia_size;
2291     s->first_slice_line=1;
2292     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2293         s->mb_x=0; //for block init below
2294         ff_init_block_index(s);
2295         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2296             s->block_index[0]+=2;
2297             s->block_index[1]+=2;
2298             s->block_index[2]+=2;
2299             s->block_index[3]+=2;
2300
2301             /* compute motion vector & mb_type and store in context */
2302             if(s->pict_type==AV_PICTURE_TYPE_B)
2303                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2304             else
2305                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2306         }
2307         s->first_slice_line=0;
2308     }
2309     return 0;
2310 }
2311
2312 static int mb_var_thread(AVCodecContext *c, void *arg){
2313     MpegEncContext *s= *(void**)arg;
2314     int mb_x, mb_y;
2315
2316     ff_check_alignment();
2317
2318     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2319         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2320             int xx = mb_x * 16;
2321             int yy = mb_y * 16;
2322             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2323             int varc;
2324             int sum = s->dsp.pix_sum(pix, s->linesize);
2325
2326             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2327
2328             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2329             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2330             s->me.mb_var_sum_temp    += varc;
2331         }
2332     }
2333     return 0;
2334 }
2335
2336 static void write_slice_end(MpegEncContext *s){
2337     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2338         if(s->partitioned_frame){
2339             ff_mpeg4_merge_partitions(s);
2340         }
2341
2342         ff_mpeg4_stuffing(&s->pb);
2343     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2344         ff_mjpeg_encode_stuffing(&s->pb);
2345     }
2346
2347     avpriv_align_put_bits(&s->pb);
2348     flush_put_bits(&s->pb);
2349
2350     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2351         s->misc_bits+= get_bits_diff(s);
2352 }
2353
2354 static void write_mb_info(MpegEncContext *s)
2355 {
2356     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2357     int offset = put_bits_count(&s->pb);
2358     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2359     int gobn = s->mb_y / s->gob_index;
2360     int pred_x, pred_y;
2361     if (CONFIG_H263_ENCODER)
2362         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2363     bytestream_put_le32(&ptr, offset);
2364     bytestream_put_byte(&ptr, s->qscale);
2365     bytestream_put_byte(&ptr, gobn);
2366     bytestream_put_le16(&ptr, mba);
2367     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2368     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2369     /* 4MV not implemented */
2370     bytestream_put_byte(&ptr, 0); /* hmv2 */
2371     bytestream_put_byte(&ptr, 0); /* vmv2 */
2372 }
2373
2374 static void update_mb_info(MpegEncContext *s, int startcode)
2375 {
2376     if (!s->mb_info)
2377         return;
2378     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2379         s->mb_info_size += 12;
2380         s->prev_mb_info = s->last_mb_info;
2381     }
2382     if (startcode) {
2383         s->prev_mb_info = put_bits_count(&s->pb)/8;
2384         /* This might have incremented mb_info_size above, and we return without
2385          * actually writing any info into that slot yet. But in that case,
2386          * this will be called again at the start of the after writing the
2387          * start code, actually writing the mb info. */
2388         return;
2389     }
2390
2391     s->last_mb_info = put_bits_count(&s->pb)/8;
2392     if (!s->mb_info_size)
2393         s->mb_info_size += 12;
2394     write_mb_info(s);
2395 }
2396
2397 static int encode_thread(AVCodecContext *c, void *arg){
2398     MpegEncContext *s= *(void**)arg;
2399     int mb_x, mb_y, pdif = 0;
2400     int chr_h= 16>>s->chroma_y_shift;
2401     int i, j;
2402     MpegEncContext best_s, backup_s;
2403     uint8_t bit_buf[2][MAX_MB_BYTES];
2404     uint8_t bit_buf2[2][MAX_MB_BYTES];
2405     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2406     PutBitContext pb[2], pb2[2], tex_pb[2];
2407 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2408
2409     ff_check_alignment();
2410
2411     for(i=0; i<2; i++){
2412         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2413         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2414         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2415     }
2416
2417     s->last_bits= put_bits_count(&s->pb);
2418     s->mv_bits=0;
2419     s->misc_bits=0;
2420     s->i_tex_bits=0;
2421     s->p_tex_bits=0;
2422     s->i_count=0;
2423     s->f_count=0;
2424     s->b_count=0;
2425     s->skip_count=0;
2426
2427     for(i=0; i<3; i++){
2428         /* init last dc values */
2429         /* note: quant matrix value (8) is implied here */
2430         s->last_dc[i] = 128 << s->intra_dc_precision;
2431
2432         s->current_picture.f.error[i] = 0;
2433     }
2434     s->mb_skip_run = 0;
2435     memset(s->last_mv, 0, sizeof(s->last_mv));
2436
2437     s->last_mv_dir = 0;
2438
2439     switch(s->codec_id){
2440     case AV_CODEC_ID_H263:
2441     case AV_CODEC_ID_H263P:
2442     case AV_CODEC_ID_FLV1:
2443         if (CONFIG_H263_ENCODER)
2444             s->gob_index = ff_h263_get_gob_height(s);
2445         break;
2446     case AV_CODEC_ID_MPEG4:
2447         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2448             ff_mpeg4_init_partitions(s);
2449         break;
2450     }
2451
2452     s->resync_mb_x=0;
2453     s->resync_mb_y=0;
2454     s->first_slice_line = 1;
2455     s->ptr_lastgob = s->pb.buf;
2456     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2457 //    printf("row %d at %X\n", s->mb_y, (int)s);
2458         s->mb_x=0;
2459         s->mb_y= mb_y;
2460
2461         ff_set_qscale(s, s->qscale);
2462         ff_init_block_index(s);
2463
2464         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2465             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2466             int mb_type= s->mb_type[xy];
2467 //            int d;
2468             int dmin= INT_MAX;
2469             int dir;
2470
2471             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2472                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2473                 return -1;
2474             }
2475             if(s->data_partitioning){
2476                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2477                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2478                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2479                     return -1;
2480                 }
2481             }
2482
2483             s->mb_x = mb_x;
2484             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2485             ff_update_block_index(s);
2486
2487             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2488                 ff_h261_reorder_mb_index(s);
2489                 xy= s->mb_y*s->mb_stride + s->mb_x;
2490                 mb_type= s->mb_type[xy];
2491             }
2492
2493             /* write gob / video packet header  */
2494             if(s->rtp_mode){
2495                 int current_packet_size, is_gob_start;
2496
2497                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2498
2499                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2500
2501                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2502
2503                 switch(s->codec_id){
2504                 case AV_CODEC_ID_H263:
2505                 case AV_CODEC_ID_H263P:
2506                     if(!s->h263_slice_structured)
2507                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2508                     break;
2509                 case AV_CODEC_ID_MPEG2VIDEO:
2510                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2511                 case AV_CODEC_ID_MPEG1VIDEO:
2512                     if(s->mb_skip_run) is_gob_start=0;
2513                     break;
2514                 }
2515
2516                 if(is_gob_start){
2517                     if(s->start_mb_y != mb_y || mb_x!=0){
2518                         write_slice_end(s);
2519
2520                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2521                             ff_mpeg4_init_partitions(s);
2522                         }
2523                     }
2524
2525                     assert((put_bits_count(&s->pb)&7) == 0);
2526                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2527
2528                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2529                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2530                         int d= 100 / s->avctx->error_rate;
2531                         if(r % d == 0){
2532                             current_packet_size=0;
2533                             s->pb.buf_ptr= s->ptr_lastgob;
2534                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2535                         }
2536                     }
2537
2538                     if (s->avctx->rtp_callback){
2539                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2540                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2541                     }
2542                     update_mb_info(s, 1);
2543
2544                     switch(s->codec_id){
2545                     case AV_CODEC_ID_MPEG4:
2546                         if (CONFIG_MPEG4_ENCODER) {
2547                             ff_mpeg4_encode_video_packet_header(s);
2548                             ff_mpeg4_clean_buffers(s);
2549                         }
2550                     break;
2551                     case AV_CODEC_ID_MPEG1VIDEO:
2552                     case AV_CODEC_ID_MPEG2VIDEO:
2553                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2554                             ff_mpeg1_encode_slice_header(s);
2555                             ff_mpeg1_clean_buffers(s);
2556                         }
2557                     break;
2558                     case AV_CODEC_ID_H263:
2559                     case AV_CODEC_ID_H263P:
2560                         if (CONFIG_H263_ENCODER)
2561                             ff_h263_encode_gob_header(s, mb_y);
2562                     break;
2563                     }
2564
2565                     if(s->flags&CODEC_FLAG_PASS1){
2566                         int bits= put_bits_count(&s->pb);
2567                         s->misc_bits+= bits - s->last_bits;
2568                         s->last_bits= bits;
2569                     }
2570
2571                     s->ptr_lastgob += current_packet_size;
2572                     s->first_slice_line=1;
2573                     s->resync_mb_x=mb_x;
2574                     s->resync_mb_y=mb_y;
2575                 }
2576             }
2577
2578             if(  (s->resync_mb_x   == s->mb_x)
2579                && s->resync_mb_y+1 == s->mb_y){
2580                 s->first_slice_line=0;
2581             }
2582
2583             s->mb_skipped=0;
2584             s->dquant=0; //only for QP_RD
2585
2586             update_mb_info(s, 0);
2587
2588             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2589                 int next_block=0;
2590                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2591
2592                 copy_context_before_encode(&backup_s, s, -1);
2593                 backup_s.pb= s->pb;
2594                 best_s.data_partitioning= s->data_partitioning;
2595                 best_s.partitioned_frame= s->partitioned_frame;
2596                 if(s->data_partitioning){
2597                     backup_s.pb2= s->pb2;
2598                     backup_s.tex_pb= s->tex_pb;
2599                 }
2600
2601                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2602                     s->mv_dir = MV_DIR_FORWARD;
2603                     s->mv_type = MV_TYPE_16X16;
2604                     s->mb_intra= 0;
2605                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2606                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2607                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2608                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2609                 }
2610                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2611                     s->mv_dir = MV_DIR_FORWARD;
2612                     s->mv_type = MV_TYPE_FIELD;
2613                     s->mb_intra= 0;
2614                     for(i=0; i<2; i++){
2615                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2616                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2617                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2618                     }
2619                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2620                                  &dmin, &next_block, 0, 0);
2621                 }
2622                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2623                     s->mv_dir = MV_DIR_FORWARD;
2624                     s->mv_type = MV_TYPE_16X16;
2625                     s->mb_intra= 0;
2626                     s->mv[0][0][0] = 0;
2627                     s->mv[0][0][1] = 0;
2628                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2629                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2630                 }
2631                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2632                     s->mv_dir = MV_DIR_FORWARD;
2633                     s->mv_type = MV_TYPE_8X8;
2634                     s->mb_intra= 0;
2635                     for(i=0; i<4; i++){
2636                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2637                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2638                     }
2639                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2640                                  &dmin, &next_block, 0, 0);
2641                 }
2642                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2643                     s->mv_dir = MV_DIR_FORWARD;
2644                     s->mv_type = MV_TYPE_16X16;
2645                     s->mb_intra= 0;
2646                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2647                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2648                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2649                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2650                 }
2651                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2652                     s->mv_dir = MV_DIR_BACKWARD;
2653                     s->mv_type = MV_TYPE_16X16;
2654                     s->mb_intra= 0;
2655                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2656                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2657                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2658                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2659                 }
2660                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2661                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2662                     s->mv_type = MV_TYPE_16X16;
2663                     s->mb_intra= 0;
2664                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2665                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2666                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2667                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2668                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2669                                  &dmin, &next_block, 0, 0);
2670                 }
2671                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2672                     s->mv_dir = MV_DIR_FORWARD;
2673                     s->mv_type = MV_TYPE_FIELD;
2674                     s->mb_intra= 0;
2675                     for(i=0; i<2; i++){
2676                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2677                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2678                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2679                     }
2680                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2681                                  &dmin, &next_block, 0, 0);
2682                 }
2683                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2684                     s->mv_dir = MV_DIR_BACKWARD;
2685                     s->mv_type = MV_TYPE_FIELD;
2686                     s->mb_intra= 0;
2687                     for(i=0; i<2; i++){
2688                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2689                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2690                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2691                     }
2692                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2693                                  &dmin, &next_block, 0, 0);
2694                 }
2695                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2696                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2697                     s->mv_type = MV_TYPE_FIELD;
2698                     s->mb_intra= 0;
2699                     for(dir=0; dir<2; dir++){
2700                         for(i=0; i<2; i++){
2701                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2702                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2703                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2704                         }
2705                     }
2706                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2707                                  &dmin, &next_block, 0, 0);
2708                 }
2709                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2710                     s->mv_dir = 0;
2711                     s->mv_type = MV_TYPE_16X16;
2712                     s->mb_intra= 1;
2713                     s->mv[0][0][0] = 0;
2714                     s->mv[0][0][1] = 0;
2715                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2716                                  &dmin, &next_block, 0, 0);
2717                     if(s->h263_pred || s->h263_aic){
2718                         if(best_s.mb_intra)
2719                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2720                         else
2721                             ff_clean_intra_table_entries(s); //old mode?
2722                     }
2723                 }
2724
2725                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2726                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2727                         const int last_qp= backup_s.qscale;
2728                         int qpi, qp, dc[6];
2729                         DCTELEM ac[6][16];
2730                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2731                         static const int dquant_tab[4]={-1,1,-2,2};
2732
2733                         assert(backup_s.dquant == 0);
2734
2735                         //FIXME intra
2736                         s->mv_dir= best_s.mv_dir;
2737                         s->mv_type = MV_TYPE_16X16;
2738                         s->mb_intra= best_s.mb_intra;
2739                         s->mv[0][0][0] = best_s.mv[0][0][0];
2740                         s->mv[0][0][1] = best_s.mv[0][0][1];
2741                         s->mv[1][0][0] = best_s.mv[1][0][0];
2742                         s->mv[1][0][1] = best_s.mv[1][0][1];
2743
2744                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2745                         for(; qpi<4; qpi++){
2746                             int dquant= dquant_tab[qpi];
2747                             qp= last_qp + dquant;
2748                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2749                                 continue;
2750                             backup_s.dquant= dquant;
2751                             if(s->mb_intra && s->dc_val[0]){
2752                                 for(i=0; i<6; i++){
2753                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2754                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2755                                 }
2756                             }
2757
2758                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2759                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2760                             if(best_s.qscale != qp){
2761                                 if(s->mb_intra && s->dc_val[0]){
2762                                     for(i=0; i<6; i++){
2763                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2764                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2765                                     }
2766                                 }
2767                             }
2768                         }
2769                     }
2770                 }
2771                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2772                     int mx= s->b_direct_mv_table[xy][0];
2773                     int my= s->b_direct_mv_table[xy][1];
2774
2775                     backup_s.dquant = 0;
2776                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2777                     s->mb_intra= 0;
2778                     ff_mpeg4_set_direct_mv(s, mx, my);
2779                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2780                                  &dmin, &next_block, mx, my);
2781                 }
2782                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2783                     backup_s.dquant = 0;
2784                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2785                     s->mb_intra= 0;
2786                     ff_mpeg4_set_direct_mv(s, 0, 0);
2787                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2788                                  &dmin, &next_block, 0, 0);
2789                 }
2790                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2791                     int coded=0;
2792                     for(i=0; i<6; i++)
2793                         coded |= s->block_last_index[i];
2794                     if(coded){
2795                         int mx,my;
2796                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2797                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2798                             mx=my=0; //FIXME find the one we actually used
2799                             ff_mpeg4_set_direct_mv(s, mx, my);
2800                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2801                             mx= s->mv[1][0][0];
2802                             my= s->mv[1][0][1];
2803                         }else{
2804                             mx= s->mv[0][0][0];
2805                             my= s->mv[0][0][1];
2806                         }
2807
2808                         s->mv_dir= best_s.mv_dir;
2809                         s->mv_type = best_s.mv_type;
2810                         s->mb_intra= 0;
2811 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2812                         s->mv[0][0][1] = best_s.mv[0][0][1];
2813                         s->mv[1][0][0] = best_s.mv[1][0][0];
2814                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2815                         backup_s.dquant= 0;
2816                         s->skipdct=1;
2817                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2818                                         &dmin, &next_block, mx, my);
2819                         s->skipdct=0;
2820                     }
2821                 }
2822
2823                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2824
2825                 copy_context_after_encode(s, &best_s, -1);
2826
2827                 pb_bits_count= put_bits_count(&s->pb);
2828                 flush_put_bits(&s->pb);
2829                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2830                 s->pb= backup_s.pb;
2831
2832                 if(s->data_partitioning){
2833                     pb2_bits_count= put_bits_count(&s->pb2);
2834                     flush_put_bits(&s->pb2);
2835                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2836                     s->pb2= backup_s.pb2;
2837
2838                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2839                     flush_put_bits(&s->tex_pb);
2840                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2841                     s->tex_pb= backup_s.tex_pb;
2842                 }
2843                 s->last_bits= put_bits_count(&s->pb);
2844
2845                 if (CONFIG_H263_ENCODER &&
2846                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2847                     ff_h263_update_motion_val(s);
2848
2849                 if(next_block==0){ //FIXME 16 vs linesize16
2850                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2851                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2852                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2853                 }
2854
2855                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2856                     ff_MPV_decode_mb(s, s->block);
2857             } else {
2858                 int motion_x = 0, motion_y = 0;
2859                 s->mv_type=MV_TYPE_16X16;
2860                 // only one MB-Type possible
2861
2862                 switch(mb_type){
2863                 case CANDIDATE_MB_TYPE_INTRA:
2864                     s->mv_dir = 0;
2865                     s->mb_intra= 1;
2866                     motion_x= s->mv[0][0][0] = 0;
2867                     motion_y= s->mv[0][0][1] = 0;
2868                     break;
2869                 case CANDIDATE_MB_TYPE_INTER:
2870                     s->mv_dir = MV_DIR_FORWARD;
2871                     s->mb_intra= 0;
2872                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2873                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2874                     break;
2875                 case CANDIDATE_MB_TYPE_INTER_I:
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_FIELD;
2878                     s->mb_intra= 0;
2879                     for(i=0; i<2; i++){
2880                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2881                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2882                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2883                     }
2884                     break;
2885                 case CANDIDATE_MB_TYPE_INTER4V:
2886                     s->mv_dir = MV_DIR_FORWARD;
2887                     s->mv_type = MV_TYPE_8X8;
2888                     s->mb_intra= 0;
2889                     for(i=0; i<4; i++){
2890                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2891                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2892                     }
2893                     break;
2894                 case CANDIDATE_MB_TYPE_DIRECT:
2895                     if (CONFIG_MPEG4_ENCODER) {
2896                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2897                         s->mb_intra= 0;
2898                         motion_x=s->b_direct_mv_table[xy][0];
2899                         motion_y=s->b_direct_mv_table[xy][1];
2900                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2901                     }
2902                     break;
2903                 case CANDIDATE_MB_TYPE_DIRECT0:
2904                     if (CONFIG_MPEG4_ENCODER) {
2905                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2906                         s->mb_intra= 0;
2907                         ff_mpeg4_set_direct_mv(s, 0, 0);
2908                     }
2909                     break;
2910                 case CANDIDATE_MB_TYPE_BIDIR:
2911                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2912                     s->mb_intra= 0;
2913                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2914                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2915                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2916                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2917                     break;
2918                 case CANDIDATE_MB_TYPE_BACKWARD:
2919                     s->mv_dir = MV_DIR_BACKWARD;
2920                     s->mb_intra= 0;
2921                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2922                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2923                     break;
2924                 case CANDIDATE_MB_TYPE_FORWARD:
2925                     s->mv_dir = MV_DIR_FORWARD;
2926                     s->mb_intra= 0;
2927                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2928                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2929 //                    printf(" %d %d ", motion_x, motion_y);
2930                     break;
2931                 case CANDIDATE_MB_TYPE_FORWARD_I:
2932                     s->mv_dir = MV_DIR_FORWARD;
2933                     s->mv_type = MV_TYPE_FIELD;
2934                     s->mb_intra= 0;
2935                     for(i=0; i<2; i++){
2936                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2937                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2938                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2939                     }
2940                     break;
2941                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2942                     s->mv_dir = MV_DIR_BACKWARD;
2943                     s->mv_type = MV_TYPE_FIELD;
2944                     s->mb_intra= 0;
2945                     for(i=0; i<2; i++){
2946                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2947                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2948                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2949                     }
2950                     break;
2951                 case CANDIDATE_MB_TYPE_BIDIR_I:
2952                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2953                     s->mv_type = MV_TYPE_FIELD;
2954                     s->mb_intra= 0;
2955                     for(dir=0; dir<2; dir++){
2956                         for(i=0; i<2; i++){
2957                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2958                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2959                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2960                         }
2961                     }
2962                     break;
2963                 default:
2964                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2965                 }
2966
2967                 encode_mb(s, motion_x, motion_y);
2968
2969                 // RAL: Update last macroblock type
2970                 s->last_mv_dir = s->mv_dir;
2971
2972                 if (CONFIG_H263_ENCODER &&
2973                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2974                     ff_h263_update_motion_val(s);
2975
2976                 ff_MPV_decode_mb(s, s->block);
2977             }
2978
2979             /* clean the MV table in IPS frames for direct mode in B frames */
2980             if(s->mb_intra /* && I,P,S_TYPE */){
2981                 s->p_mv_table[xy][0]=0;
2982                 s->p_mv_table[xy][1]=0;
2983             }
2984
2985             if(s->flags&CODEC_FLAG_PSNR){
2986                 int w= 16;
2987                 int h= 16;
2988
2989                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2990                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2991
2992                 s->current_picture.f.error[0] += sse(
2993                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2994                     s->dest[0], w, h, s->linesize);
2995                 s->current_picture.f.error[1] += sse(
2996                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2997                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2998                 s->current_picture.f.error[2] += sse(
2999                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3000                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3001             }
3002             if(s->loop_filter){
3003                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3004                     ff_h263_loop_filter(s);
3005             }
3006 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3007         }
3008     }
3009
3010     //not beautiful here but we must write it before flushing so it has to be here
3011     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3012         ff_msmpeg4_encode_ext_header(s);
3013
3014     write_slice_end(s);
3015
3016     /* Send the last GOB if RTP */
3017     if (s->avctx->rtp_callback) {
3018         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3019         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3020         /* Call the RTP callback to send the last GOB */
3021         emms_c();
3022         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3023     }
3024
3025     return 0;
3026 }
3027
3028 #define MERGE(field) dst->field += src->field; src->field=0
3029 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3030     MERGE(me.scene_change_score);
3031     MERGE(me.mc_mb_var_sum_temp);
3032     MERGE(me.mb_var_sum_temp);
3033 }
3034
3035 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3036     int i;
3037
3038     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3039     MERGE(dct_count[1]);
3040     MERGE(mv_bits);
3041     MERGE(i_tex_bits);
3042     MERGE(p_tex_bits);
3043     MERGE(i_count);
3044     MERGE(f_count);
3045     MERGE(b_count);
3046     MERGE(skip_count);
3047     MERGE(misc_bits);
3048     MERGE(error_count);
3049     MERGE(padding_bug_score);
3050     MERGE(current_picture.f.error[0]);
3051     MERGE(current_picture.f.error[1]);
3052     MERGE(current_picture.f.error[2]);
3053
3054     if(dst->avctx->noise_reduction){
3055         for(i=0; i<64; i++){
3056             MERGE(dct_error_sum[0][i]);
3057             MERGE(dct_error_sum[1][i]);
3058         }
3059     }
3060
3061     assert(put_bits_count(&src->pb) % 8 ==0);
3062     assert(put_bits_count(&dst->pb) % 8 ==0);
3063     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3064     flush_put_bits(&dst->pb);
3065 }
3066
3067 static int estimate_qp(MpegEncContext *s, int dry_run){
3068     if (s->next_lambda){
3069         s->current_picture_ptr->f.quality =
3070         s->current_picture.f.quality = s->next_lambda;
3071         if(!dry_run) s->next_lambda= 0;
3072     } else if (!s->fixed_qscale) {
3073         s->current_picture_ptr->f.quality =
3074         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3075         if (s->current_picture.f.quality < 0)
3076             return -1;
3077     }
3078
3079     if(s->adaptive_quant){
3080         switch(s->codec_id){
3081         case AV_CODEC_ID_MPEG4:
3082             if (CONFIG_MPEG4_ENCODER)
3083                 ff_clean_mpeg4_qscales(s);
3084             break;
3085         case AV_CODEC_ID_H263:
3086         case AV_CODEC_ID_H263P:
3087         case AV_CODEC_ID_FLV1:
3088             if (CONFIG_H263_ENCODER)
3089                 ff_clean_h263_qscales(s);
3090             break;
3091         default:
3092             ff_init_qscale_tab(s);
3093         }
3094
3095         s->lambda= s->lambda_table[0];
3096         //FIXME broken
3097     }else
3098         s->lambda = s->current_picture.f.quality;
3099 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3100     update_qscale(s);
3101     return 0;
3102 }
3103
3104 /* must be called before writing the header */
3105 static void set_frame_distances(MpegEncContext * s){
3106     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3107     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3108
3109     if(s->pict_type==AV_PICTURE_TYPE_B){
3110         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3111         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3112     }else{
3113         s->pp_time= s->time - s->last_non_b_time;
3114         s->last_non_b_time= s->time;
3115         assert(s->picture_number==0 || s->pp_time > 0);
3116     }
3117 }
3118
3119 static int encode_picture(MpegEncContext *s, int picture_number)
3120 {
3121     int i;
3122     int bits;
3123     int context_count = s->slice_context_count;
3124
3125     s->picture_number = picture_number;
3126
3127     /* Reset the average MB variance */
3128     s->me.mb_var_sum_temp    =
3129     s->me.mc_mb_var_sum_temp = 0;
3130
3131     /* we need to initialize some time vars before we can encode b-frames */
3132     // RAL: Condition added for MPEG1VIDEO
3133     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3134         set_frame_distances(s);
3135     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3136         ff_set_mpeg4_time(s);
3137
3138     s->me.scene_change_score=0;
3139
3140 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3141
3142     if(s->pict_type==AV_PICTURE_TYPE_I){
3143         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3144         else                        s->no_rounding=0;
3145     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3146         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3147             s->no_rounding ^= 1;
3148     }
3149
3150     if(s->flags & CODEC_FLAG_PASS2){
3151         if (estimate_qp(s,1) < 0)
3152             return -1;
3153         ff_get_2pass_fcode(s);
3154     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3155         if(s->pict_type==AV_PICTURE_TYPE_B)
3156             s->lambda= s->last_lambda_for[s->pict_type];
3157         else
3158             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3159         update_qscale(s);
3160     }
3161
3162     s->mb_intra=0; //for the rate distortion & bit compare functions
3163     for(i=1; i<context_count; i++){
3164         ff_update_duplicate_context(s->thread_context[i], s);
3165     }
3166
3167     if(ff_init_me(s)<0)
3168         return -1;
3169
3170     /* Estimate motion for every MB */
3171     if(s->pict_type != AV_PICTURE_TYPE_I){
3172         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3173         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3174         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3175             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3176                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3177             }
3178         }
3179
3180         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3181     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3182         /* I-Frame */
3183         for(i=0; i<s->mb_stride*s->mb_height; i++)
3184             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3185
3186         if(!s->fixed_qscale){
3187             /* finding spatial complexity for I-frame rate control */
3188             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3189         }
3190     }
3191     for(i=1; i<context_count; i++){
3192         merge_context_after_me(s, s->thread_context[i]);
3193     }
3194     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3195     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3196     emms_c();
3197
3198     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3199         s->pict_type= AV_PICTURE_TYPE_I;
3200         for(i=0; i<s->mb_stride*s->mb_height; i++)
3201             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3202 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3203     }
3204
3205     if(!s->umvplus){
3206         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3207             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3208
3209             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3210                 int a,b;
3211                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3212                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3213                 s->f_code= FFMAX3(s->f_code, a, b);
3214             }
3215
3216             ff_fix_long_p_mvs(s);
3217             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3218             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3219                 int j;
3220                 for(i=0; i<2; i++){
3221                     for(j=0; j<2; j++)
3222                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3223                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3224                 }
3225             }
3226         }
3227
3228         if(s->pict_type==AV_PICTURE_TYPE_B){
3229             int a, b;
3230
3231             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3232             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3233             s->f_code = FFMAX(a, b);
3234
3235             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3236             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3237             s->b_code = FFMAX(a, b);
3238
3239             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3240             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3241             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3242             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3243             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3244                 int dir, j;
3245                 for(dir=0; dir<2; dir++){
3246                     for(i=0; i<2; i++){
3247                         for(j=0; j<2; j++){
3248                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3249                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3250                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3251                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3252                         }
3253                     }
3254                 }
3255             }
3256         }
3257     }
3258
3259     if (estimate_qp(s, 0) < 0)
3260         return -1;
3261
3262     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3263         s->qscale= 3; //reduce clipping problems
3264
3265     if (s->out_format == FMT_MJPEG) {
3266         /* for mjpeg, we do include qscale in the matrix */
3267         for(i=1;i<64;i++){
3268             int j= s->dsp.idct_permutation[i];
3269
3270             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3271         }
3272         s->y_dc_scale_table=
3273         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3274         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3275         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3276                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3277         s->qscale= 8;
3278     }
3279
3280     //FIXME var duplication
3281     s->current_picture_ptr->f.key_frame =
3282     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3283     s->current_picture_ptr->f.pict_type =
3284     s->current_picture.f.pict_type = s->pict_type;
3285
3286     if (s->current_picture.f.key_frame)
3287         s->picture_in_gop_number=0;
3288
3289     s->last_bits= put_bits_count(&s->pb);
3290     switch(s->out_format) {
3291     case FMT_MJPEG:
3292         if (CONFIG_MJPEG_ENCODER)
3293             ff_mjpeg_encode_picture_header(s);
3294         break;
3295     case FMT_H261:
3296         if (CONFIG_H261_ENCODER)
3297             ff_h261_encode_picture_header(s, picture_number);
3298         break;
3299     case FMT_H263:
3300         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3301             ff_wmv2_encode_picture_header(s, picture_number);
3302         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3303             ff_msmpeg4_encode_picture_header(s, picture_number);
3304         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3305             ff_mpeg4_encode_picture_header(s, picture_number);
3306         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3307             ff_rv10_encode_picture_header(s, picture_number);
3308         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3309             ff_rv20_encode_picture_header(s, picture_number);
3310         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3311             ff_flv_encode_picture_header(s, picture_number);
3312         else if (CONFIG_H263_ENCODER)
3313             ff_h263_encode_picture_header(s, picture_number);
3314         break;
3315     case FMT_MPEG1:
3316         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3317             ff_mpeg1_encode_picture_header(s, picture_number);
3318         break;
3319     case FMT_H264:
3320         break;
3321     default:
3322         assert(0);
3323     }
3324     bits= put_bits_count(&s->pb);
3325     s->header_bits= bits - s->last_bits;
3326
3327     for(i=1; i<context_count; i++){
3328         update_duplicate_context_after_me(s->thread_context[i], s);
3329     }
3330     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3331     for(i=1; i<context_count; i++){
3332         merge_context_after_encode(s, s->thread_context[i]);
3333     }
3334     emms_c();
3335     return 0;
3336 }
3337
3338 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3339     const int intra= s->mb_intra;
3340     int i;
3341
3342     s->dct_count[intra]++;
3343
3344     for(i=0; i<64; i++){
3345         int level= block[i];
3346
3347         if(level){
3348             if(level>0){
3349                 s->dct_error_sum[intra][i] += level;
3350                 level -= s->dct_offset[intra][i];
3351                 if(level<0) level=0;
3352             }else{
3353                 s->dct_error_sum[intra][i] -= level;
3354                 level += s->dct_offset[intra][i];
3355                 if(level>0) level=0;
3356             }
3357             block[i]= level;
3358         }
3359     }
3360 }
3361
3362 static int dct_quantize_trellis_c(MpegEncContext *s,
3363                                   DCTELEM *block, int n,
3364                                   int qscale, int *overflow){
3365     const int *qmat;
3366     const uint8_t *scantable= s->intra_scantable.scantable;
3367     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3368     int max=0;
3369     unsigned int threshold1, threshold2;
3370     int bias=0;
3371     int run_tab[65];
3372     int level_tab[65];
3373     int score_tab[65];
3374     int survivor[65];
3375     int survivor_count;
3376     int last_run=0;
3377     int last_level=0;
3378     int last_score= 0;
3379     int last_i;
3380     int coeff[2][64];
3381     int coeff_count[64];
3382     int qmul, qadd, start_i, last_non_zero, i, dc;
3383     const int esc_length= s->ac_esc_length;
3384     uint8_t * length;
3385     uint8_t * last_length;
3386     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3387
3388     s->dsp.fdct (block);
3389
3390     if(s->dct_error_sum)
3391         s->denoise_dct(s, block);
3392     qmul= qscale*16;
3393     qadd= ((qscale-1)|1)*8;
3394
3395     if (s->mb_intra) {
3396         int q;
3397         if (!s->h263_aic) {
3398             if (n < 4)
3399                 q = s->y_dc_scale;
3400             else
3401                 q = s->c_dc_scale;
3402             q = q << 3;
3403         } else{
3404             /* For AIC we skip quant/dequant of INTRADC */
3405             q = 1 << 3;
3406             qadd=0;
3407         }
3408
3409         /* note: block[0] is assumed to be positive */
3410         block[0] = (block[0] + (q >> 1)) / q;
3411         start_i = 1;
3412         last_non_zero = 0;
3413         qmat = s->q_intra_matrix[qscale];
3414         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3415             bias= 1<<(QMAT_SHIFT-1);
3416         length     = s->intra_ac_vlc_length;
3417         last_length= s->intra_ac_vlc_last_length;
3418     } else {
3419         start_i = 0;
3420         last_non_zero = -1;
3421         qmat = s->q_inter_matrix[qscale];
3422         length     = s->inter_ac_vlc_length;
3423         last_length= s->inter_ac_vlc_last_length;
3424     }
3425     last_i= start_i;
3426
3427     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3428     threshold2= (threshold1<<1);
3429
3430     for(i=63; i>=start_i; i--) {
3431         const int j = scantable[i];
3432         int level = block[j] * qmat[j];
3433
3434         if(((unsigned)(level+threshold1))>threshold2){
3435             last_non_zero = i;
3436             break;
3437         }
3438     }
3439
3440     for(i=start_i; i<=last_non_zero; i++) {
3441         const int j = scantable[i];
3442         int level = block[j] * qmat[j];
3443
3444 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3445 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3446         if(((unsigned)(level+threshold1))>threshold2){
3447             if(level>0){
3448                 level= (bias + level)>>QMAT_SHIFT;
3449                 coeff[0][i]= level;
3450                 coeff[1][i]= level-1;
3451 //                coeff[2][k]= level-2;
3452             }else{
3453                 level= (bias - level)>>QMAT_SHIFT;
3454                 coeff[0][i]= -level;
3455                 coeff[1][i]= -level+1;
3456 //                coeff[2][k]= -level+2;
3457             }
3458             coeff_count[i]= FFMIN(level, 2);
3459             assert(coeff_count[i]);
3460             max |=level;
3461         }else{
3462             coeff[0][i]= (level>>31)|1;
3463             coeff_count[i]= 1;
3464         }
3465     }
3466
3467     *overflow= s->max_qcoeff < max; //overflow might have happened
3468
3469     if(last_non_zero < start_i){
3470         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3471         return last_non_zero;
3472     }
3473
3474     score_tab[start_i]= 0;
3475     survivor[0]= start_i;
3476     survivor_count= 1;
3477
3478     for(i=start_i; i<=last_non_zero; i++){
3479         int level_index, j, zero_distortion;
3480         int dct_coeff= FFABS(block[ scantable[i] ]);
3481         int best_score=256*256*256*120;
3482
3483         if (s->dsp.fdct == ff_fdct_ifast)
3484             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3485         zero_distortion= dct_coeff*dct_coeff;
3486
3487         for(level_index=0; level_index < coeff_count[i]; level_index++){
3488             int distortion;
3489             int level= coeff[level_index][i];
3490             const int alevel= FFABS(level);
3491             int unquant_coeff;
3492
3493             assert(level);
3494
3495             if(s->out_format == FMT_H263){
3496                 unquant_coeff= alevel*qmul + qadd;
3497             }else{ //MPEG1
3498                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3499                 if(s->mb_intra){
3500                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3501                         unquant_coeff =   (unquant_coeff - 1) | 1;
3502                 }else{
3503                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3504                         unquant_coeff =   (unquant_coeff - 1) | 1;
3505                 }
3506                 unquant_coeff<<= 3;
3507             }
3508
3509             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3510             level+=64;
3511             if((level&(~127)) == 0){
3512                 for(j=survivor_count-1; j>=0; j--){
3513                     int run= i - survivor[j];
3514                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3515                     score += score_tab[i-run];
3516
3517                     if(score < best_score){
3518                         best_score= score;
3519                         run_tab[i+1]= run;
3520                         level_tab[i+1]= level-64;
3521                     }
3522                 }
3523
3524                 if(s->out_format == FMT_H263){
3525                     for(j=survivor_count-1; j>=0; j--){
3526                         int run= i - survivor[j];
3527                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3528                         score += score_tab[i-run];
3529                         if(score < last_score){
3530                             last_score= score;
3531                             last_run= run;
3532                             last_level= level-64;
3533                             last_i= i+1;
3534                         }
3535                     }
3536                 }
3537             }else{
3538                 distortion += esc_length*lambda;
3539                 for(j=survivor_count-1; j>=0; j--){
3540                     int run= i - survivor[j];
3541                     int score= distortion + score_tab[i-run];
3542
3543                     if(score < best_score){
3544                         best_score= score;
3545                         run_tab[i+1]= run;
3546                         level_tab[i+1]= level-64;
3547                     }
3548                 }
3549
3550                 if(s->out_format == FMT_H263){
3551                   for(j=survivor_count-1; j>=0; j--){
3552                         int run= i - survivor[j];
3553                         int score= distortion + score_tab[i-run];
3554                         if(score < last_score){
3555                             last_score= score;
3556                             last_run= run;
3557                             last_level= level-64;
3558                             last_i= i+1;
3559                         }
3560                     }
3561                 }
3562             }
3563         }
3564
3565         score_tab[i+1]= best_score;
3566
3567         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3568         if(last_non_zero <= 27){
3569             for(; survivor_count; survivor_count--){
3570                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3571                     break;
3572             }
3573         }else{
3574             for(; survivor_count; survivor_count--){
3575                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3576                     break;
3577             }
3578         }
3579
3580         survivor[ survivor_count++ ]= i+1;
3581     }
3582
3583     if(s->out_format != FMT_H263){
3584         last_score= 256*256*256*120;
3585         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3586             int score= score_tab[i];
3587             if(i) score += lambda*2; //FIXME exacter?
3588
3589             if(score < last_score){
3590                 last_score= score;
3591                 last_i= i;
3592                 last_level= level_tab[i];
3593                 last_run= run_tab[i];
3594             }
3595         }
3596     }
3597
3598     s->coded_score[n] = last_score;
3599
3600     dc= FFABS(block[0]);
3601     last_non_zero= last_i - 1;
3602     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3603
3604     if(last_non_zero < start_i)
3605         return last_non_zero;
3606
3607     if(last_non_zero == 0 && start_i == 0){
3608         int best_level= 0;
3609         int best_score= dc * dc;
3610
3611         for(i=0; i<coeff_count[0]; i++){
3612             int level= coeff[i][0];
3613             int alevel= FFABS(level);
3614             int unquant_coeff, score, distortion;
3615
3616             if(s->out_format == FMT_H263){
3617                     unquant_coeff= (alevel*qmul + qadd)>>3;
3618             }else{ //MPEG1
3619                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3620                     unquant_coeff =   (unquant_coeff - 1) | 1;
3621             }
3622             unquant_coeff = (unquant_coeff + 4) >> 3;
3623             unquant_coeff<<= 3 + 3;
3624
3625             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3626             level+=64;
3627             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3628             else                    score= distortion + esc_length*lambda;
3629
3630             if(score < best_score){
3631                 best_score= score;
3632                 best_level= level - 64;
3633             }
3634         }
3635         block[0]= best_level;
3636         s->coded_score[n] = best_score - dc*dc;
3637         if(best_level == 0) return -1;
3638         else                return last_non_zero;
3639     }
3640
3641     i= last_i;
3642     assert(last_level);
3643
3644     block[ perm_scantable[last_non_zero] ]= last_level;
3645     i -= last_run + 1;
3646
3647     for(; i>start_i; i -= run_tab[i] + 1){
3648         block[ perm_scantable[i-1] ]= level_tab[i];
3649     }
3650
3651     return last_non_zero;
3652 }
3653
3654 //#define REFINE_STATS 1
3655 static int16_t basis[64][64];
3656
3657 static void build_basis(uint8_t *perm){
3658     int i, j, x, y;
3659     emms_c();
3660     for(i=0; i<8; i++){
3661         for(j=0; j<8; j++){
3662             for(y=0; y<8; y++){
3663                 for(x=0; x<8; x++){
3664                     double s= 0.25*(1<<BASIS_SHIFT);
3665                     int index= 8*i + j;
3666                     int perm_index= perm[index];
3667                     if(i==0) s*= sqrt(0.5);
3668                     if(j==0) s*= sqrt(0.5);
3669                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3670                 }
3671             }
3672         }
3673     }
3674 }
3675
3676 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3677                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3678                         int n, int qscale){
3679     int16_t rem[64];
3680     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3681     const uint8_t *scantable= s->intra_scantable.scantable;
3682     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3683 //    unsigned int threshold1, threshold2;
3684 //    int bias=0;
3685     int run_tab[65];
3686     int prev_run=0;
3687     int prev_level=0;
3688     int qmul, qadd, start_i, last_non_zero, i, dc;
3689     uint8_t * length;
3690     uint8_t * last_length;
3691     int lambda;
3692     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3693 #ifdef REFINE_STATS
3694 static int count=0;
3695 static int after_last=0;
3696 static int to_zero=0;
3697 static int from_zero=0;
3698 static int raise=0;
3699 static int lower=0;
3700 static int messed_sign=0;
3701 #endif
3702
3703     if(basis[0][0] == 0)
3704         build_basis(s->dsp.idct_permutation);
3705
3706     qmul= qscale*2;
3707     qadd= (qscale-1)|1;
3708     if (s->mb_intra) {
3709         if (!s->h263_aic) {
3710             if (n < 4)
3711                 q = s->y_dc_scale;
3712             else
3713                 q = s->c_dc_scale;
3714         } else{
3715             /* For AIC we skip quant/dequant of INTRADC */
3716             q = 1;
3717             qadd=0;
3718         }
3719         q <<= RECON_SHIFT-3;
3720         /* note: block[0] is assumed to be positive */
3721         dc= block[0]*q;
3722 //        block[0] = (block[0] + (q >> 1)) / q;
3723         start_i = 1;
3724 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3725 //            bias= 1<<(QMAT_SHIFT-1);
3726         length     = s->intra_ac_vlc_length;
3727         last_length= s->intra_ac_vlc_last_length;
3728     } else {
3729         dc= 0;
3730         start_i = 0;
3731         length     = s->inter_ac_vlc_length;
3732         last_length= s->inter_ac_vlc_last_length;
3733     }
3734     last_non_zero = s->block_last_index[n];
3735
3736 #ifdef REFINE_STATS
3737 {START_TIMER
3738 #endif
3739     dc += (1<<(RECON_SHIFT-1));
3740     for(i=0; i<64; i++){
3741         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3742     }
3743 #ifdef REFINE_STATS
3744 STOP_TIMER("memset rem[]")}
3745 #endif
3746     sum=0;
3747     for(i=0; i<64; i++){
3748         int one= 36;
3749         int qns=4;
3750         int w;
3751
3752         w= FFABS(weight[i]) + qns*one;
3753         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3754
3755         weight[i] = w;
3756 //        w=weight[i] = (63*qns + (w/2)) / w;
3757
3758         assert(w>0);
3759         assert(w<(1<<6));
3760         sum += w*w;
3761     }
3762     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3763 #ifdef REFINE_STATS
3764 {START_TIMER
3765 #endif
3766     run=0;
3767     rle_index=0;
3768     for(i=start_i; i<=last_non_zero; i++){
3769         int j= perm_scantable[i];
3770         const int level= block[j];
3771         int coeff;
3772
3773         if(level){
3774             if(level<0) coeff= qmul*level - qadd;
3775             else        coeff= qmul*level + qadd;
3776             run_tab[rle_index++]=run;
3777             run=0;
3778
3779             s->dsp.add_8x8basis(rem, basis[j], coeff);
3780         }else{
3781             run++;
3782         }
3783     }
3784 #ifdef REFINE_STATS
3785 if(last_non_zero>0){
3786 STOP_TIMER("init rem[]")
3787 }
3788 }
3789
3790 {START_TIMER
3791 #endif
3792     for(;;){
3793         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3794         int best_coeff=0;
3795         int best_change=0;
3796         int run2, best_unquant_change=0, analyze_gradient;
3797 #ifdef REFINE_STATS
3798 {START_TIMER
3799 #endif
3800         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3801
3802         if(analyze_gradient){
3803 #ifdef REFINE_STATS
3804 {START_TIMER
3805 #endif
3806             for(i=0; i<64; i++){
3807                 int w= weight[i];
3808
3809                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3810             }
3811 #ifdef REFINE_STATS
3812 STOP_TIMER("rem*w*w")}
3813 {START_TIMER
3814 #endif
3815             s->dsp.fdct(d1);
3816 #ifdef REFINE_STATS
3817 STOP_TIMER("dct")}
3818 #endif
3819         }
3820
3821         if(start_i){
3822             const int level= block[0];
3823             int change, old_coeff;
3824
3825             assert(s->mb_intra);
3826
3827             old_coeff= q*level;
3828
3829             for(change=-1; change<=1; change+=2){
3830                 int new_level= level + change;
3831                 int score, new_coeff;
3832
3833                 new_coeff= q*new_level;
3834                 if(new_coeff >= 2048 || new_coeff < 0)
3835                     continue;
3836
3837                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3838                 if(score<best_score){
3839                     best_score= score;
3840                     best_coeff= 0;
3841                     best_change= change;
3842                     best_unquant_change= new_coeff - old_coeff;
3843                 }
3844             }
3845         }
3846
3847         run=0;
3848         rle_index=0;
3849         run2= run_tab[rle_index++];
3850         prev_level=0;
3851         prev_run=0;
3852
3853         for(i=start_i; i<64; i++){
3854             int j= perm_scantable[i];
3855             const int level= block[j];
3856             int change, old_coeff;
3857
3858             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3859                 break;
3860
3861             if(level){
3862                 if(level<0) old_coeff= qmul*level - qadd;
3863                 else        old_coeff= qmul*level + qadd;
3864                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3865             }else{
3866                 old_coeff=0;
3867                 run2--;
3868                 assert(run2>=0 || i >= last_non_zero );
3869             }
3870
3871             for(change=-1; change<=1; change+=2){
3872                 int new_level= level + change;
3873                 int score, new_coeff, unquant_change;
3874
3875                 score=0;
3876                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3877                    continue;
3878
3879                 if(new_level){
3880                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3881                     else            new_coeff= qmul*new_level + qadd;
3882                     if(new_coeff >= 2048 || new_coeff <= -2048)
3883                         continue;
3884                     //FIXME check for overflow
3885
3886                     if(level){
3887                         if(level < 63 && level > -63){
3888                             if(i < last_non_zero)
3889                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3890                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3891                             else
3892                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3893                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3894                         }
3895                     }else{
3896                         assert(FFABS(new_level)==1);
3897
3898                         if(analyze_gradient){
3899                             int g= d1[ scantable[i] ];
3900                             if(g && (g^new_level) >= 0)
3901                                 continue;
3902                         }
3903
3904                         if(i < last_non_zero){
3905                             int next_i= i + run2 + 1;
3906                             int next_level= block[ perm_scantable[next_i] ] + 64;
3907
3908                             if(next_level&(~127))
3909                                 next_level= 0;
3910
3911                             if(next_i < last_non_zero)
3912                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3913                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3914                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3915                             else
3916                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3917                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3918                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3919                         }else{
3920                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3921                             if(prev_level){
3922                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3923                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3924                             }
3925                         }
3926                     }
3927                 }else{
3928                     new_coeff=0;
3929                     assert(FFABS(level)==1);
3930
3931                     if(i < last_non_zero){
3932                         int next_i= i + run2 + 1;
3933                         int next_level= block[ perm_scantable[next_i] ] + 64;
3934
3935                         if(next_level&(~127))
3936                             next_level= 0;
3937
3938                         if(next_i < last_non_zero)
3939                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3940                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3941                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3942                         else
3943                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3944                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3945                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3946                     }else{
3947                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3948                         if(prev_level){
3949                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3950                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3951                         }
3952                     }
3953                 }
3954
3955                 score *= lambda;
3956
3957                 unquant_change= new_coeff - old_coeff;
3958                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3959
3960                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3961                 if(score<best_score){
3962                     best_score= score;
3963                     best_coeff= i;
3964                     best_change= change;
3965                     best_unquant_change= unquant_change;
3966                 }
3967             }
3968             if(level){
3969                 prev_level= level + 64;
3970                 if(prev_level&(~127))
3971                     prev_level= 0;
3972                 prev_run= run;
3973                 run=0;
3974             }else{
3975                 run++;
3976             }
3977         }
3978 #ifdef REFINE_STATS
3979 STOP_TIMER("iterative step")}
3980 #endif
3981
3982         if(best_change){
3983             int j= perm_scantable[ best_coeff ];
3984
3985             block[j] += best_change;
3986
3987             if(best_coeff > last_non_zero){
3988                 last_non_zero= best_coeff;
3989                 assert(block[j]);
3990 #ifdef REFINE_STATS
3991 after_last++;
3992 #endif
3993             }else{
3994 #ifdef REFINE_STATS
3995 if(block[j]){
3996     if(block[j] - best_change){
3997         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3998             raise++;
3999         }else{
4000             lower++;
4001         }
4002     }else{
4003         from_zero++;
4004     }
4005 }else{
4006     to_zero++;
4007 }
4008 #endif
4009                 for(; last_non_zero>=start_i; last_non_zero--){
4010                     if(block[perm_scantable[last_non_zero]])
4011                         break;
4012                 }
4013             }
4014 #ifdef REFINE_STATS
4015 count++;
4016 if(256*256*256*64 % count == 0){
4017     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4018 }
4019 #endif
4020             run=0;
4021             rle_index=0;
4022             for(i=start_i; i<=last_non_zero; i++){
4023                 int j= perm_scantable[i];
4024                 const int level= block[j];
4025
4026                  if(level){
4027                      run_tab[rle_index++]=run;
4028                      run=0;
4029                  }else{
4030                      run++;
4031                  }
4032             }
4033
4034             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4035         }else{
4036             break;
4037         }
4038     }
4039 #ifdef REFINE_STATS
4040 if(last_non_zero>0){
4041 STOP_TIMER("iterative search")
4042 }
4043 }
4044 #endif
4045
4046     return last_non_zero;
4047 }
4048
4049 int ff_dct_quantize_c(MpegEncContext *s,
4050                         DCTELEM *block, int n,
4051                         int qscale, int *overflow)
4052 {
4053     int i, j, level, last_non_zero, q, start_i;
4054     const int *qmat;
4055     const uint8_t *scantable= s->intra_scantable.scantable;
4056     int bias;
4057     int max=0;
4058     unsigned int threshold1, threshold2;
4059
4060     s->dsp.fdct (block);
4061
4062     if(s->dct_error_sum)
4063         s->denoise_dct(s, block);
4064
4065     if (s->mb_intra) {
4066         if (!s->h263_aic) {
4067             if (n < 4)
4068                 q = s->y_dc_scale;
4069             else
4070                 q = s->c_dc_scale;
4071             q = q << 3;
4072         } else
4073             /* For AIC we skip quant/dequant of INTRADC */
4074             q = 1 << 3;
4075
4076         /* note: block[0] is assumed to be positive */
4077         block[0] = (block[0] + (q >> 1)) / q;
4078         start_i = 1;
4079         last_non_zero = 0;
4080         qmat = s->q_intra_matrix[qscale];
4081         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4082     } else {
4083         start_i = 0;
4084         last_non_zero = -1;
4085         qmat = s->q_inter_matrix[qscale];
4086         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4087     }
4088     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4089     threshold2= (threshold1<<1);
4090     for(i=63;i>=start_i;i--) {
4091         j = scantable[i];
4092         level = block[j] * qmat[j];
4093
4094         if(((unsigned)(level+threshold1))>threshold2){
4095             last_non_zero = i;
4096             break;
4097         }else{
4098             block[j]=0;
4099         }
4100     }
4101     for(i=start_i; i<=last_non_zero; i++) {
4102         j = scantable[i];
4103         level = block[j] * qmat[j];
4104
4105 //        if(   bias+level >= (1<<QMAT_SHIFT)
4106 //           || bias-level >= (1<<QMAT_SHIFT)){
4107         if(((unsigned)(level+threshold1))>threshold2){
4108             if(level>0){
4109                 level= (bias + level)>>QMAT_SHIFT;
4110                 block[j]= level;
4111             }else{
4112                 level= (bias - level)>>QMAT_SHIFT;
4113                 block[j]= -level;
4114             }
4115             max |=level;
4116         }else{
4117             block[j]=0;
4118         }
4119     }
4120     *overflow= s->max_qcoeff < max; //overflow might have happened
4121
4122     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4123     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4124         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4125
4126     return last_non_zero;
4127 }
4128
4129 #define OFFSET(x) offsetof(MpegEncContext, x)
4130 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4131 static const AVOption h263_options[] = {
4132     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4133     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4134     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4135     FF_MPV_COMMON_OPTS
4136     { NULL },
4137 };
4138
4139 static const AVClass h263_class = {
4140     .class_name = "H.263 encoder",
4141     .item_name  = av_default_item_name,
4142     .option     = h263_options,
4143     .version    = LIBAVUTIL_VERSION_INT,
4144 };
4145
4146 AVCodec ff_h263_encoder = {
4147     .name           = "h263",
4148     .type           = AVMEDIA_TYPE_VIDEO,
4149     .id             = AV_CODEC_ID_H263,
4150     .priv_data_size = sizeof(MpegEncContext),
4151     .init           = ff_MPV_encode_init,
4152     .encode2        = ff_MPV_encode_picture,
4153     .close          = ff_MPV_encode_end,
4154     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4155     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4156     .priv_class     = &h263_class,
4157 };
4158
4159 static const AVOption h263p_options[] = {
4160     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4161     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4162     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4163     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4164     FF_MPV_COMMON_OPTS
4165     { NULL },
4166 };
4167 static const AVClass h263p_class = {
4168     .class_name = "H.263p encoder",
4169     .item_name  = av_default_item_name,
4170     .option     = h263p_options,
4171     .version    = LIBAVUTIL_VERSION_INT,
4172 };
4173
4174 AVCodec ff_h263p_encoder = {
4175     .name           = "h263p",
4176     .type           = AVMEDIA_TYPE_VIDEO,
4177     .id             = AV_CODEC_ID_H263P,
4178     .priv_data_size = sizeof(MpegEncContext),
4179     .init           = ff_MPV_encode_init,
4180     .encode2        = ff_MPV_encode_picture,
4181     .close          = ff_MPV_encode_end,
4182     .capabilities   = CODEC_CAP_SLICE_THREADS,
4183     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4184     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4185     .priv_class     = &h263p_class,
4186 };
4187
4188 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4189
4190 AVCodec ff_msmpeg4v2_encoder = {
4191     .name           = "msmpeg4v2",
4192     .type           = AVMEDIA_TYPE_VIDEO,
4193     .id             = AV_CODEC_ID_MSMPEG4V2,
4194     .priv_data_size = sizeof(MpegEncContext),
4195     .init           = ff_MPV_encode_init,
4196     .encode2        = ff_MPV_encode_picture,
4197     .close          = ff_MPV_encode_end,
4198     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4199     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4200     .priv_class     = &msmpeg4v2_class,
4201 };
4202
4203 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4204
4205 AVCodec ff_msmpeg4v3_encoder = {
4206     .name           = "msmpeg4",
4207     .type           = AVMEDIA_TYPE_VIDEO,
4208     .id             = AV_CODEC_ID_MSMPEG4V3,
4209     .priv_data_size = sizeof(MpegEncContext),
4210     .init           = ff_MPV_encode_init,
4211     .encode2        = ff_MPV_encode_picture,
4212     .close          = ff_MPV_encode_end,
4213     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4214     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4215     .priv_class     = &msmpeg4v3_class,
4216 };
4217
4218 FF_MPV_GENERIC_CLASS(wmv1)
4219
4220 AVCodec ff_wmv1_encoder = {
4221     .name           = "wmv1",
4222     .type           = AVMEDIA_TYPE_VIDEO,
4223     .id             = AV_CODEC_ID_WMV1,
4224     .priv_data_size = sizeof(MpegEncContext),
4225     .init           = ff_MPV_encode_init,
4226     .encode2        = ff_MPV_encode_picture,
4227     .close          = ff_MPV_encode_end,
4228     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4229     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4230     .priv_class     = &wmv1_class,
4231 };