]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
dsputil: Move LOCAL_ALIGNED macros to libavutil
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dsputil.h"
37 #include "mpegvideo.h"
38 #include "h263.h"
39 #include "mathops.h"
40 #include "mjpegenc.h"
41 #include "msmpeg4.h"
42 #include "faandct.h"
43 #include "thread.h"
44 #include "aandcttab.h"
45 #include "flv.h"
46 #include "mpeg4video.h"
47 #include "internal.h"
48 #include "bytestream.h"
49 #include <limits.h>
50
51 //#undef NDEBUG
52 //#include <assert.h>
53
54 static int encode_picture(MpegEncContext *s, int picture_number);
55 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
56 static int sse_mb(MpegEncContext *s);
57 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
58 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
59
60 /* enable all paranoid tests for rounding, overflows, etc... */
61 //#define PARANOID
62
63 //#define DEBUG
64
65 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
66 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
67
68 const AVOption ff_mpv_generic_options[] = {
69     FF_MPV_COMMON_OPTS
70     { NULL },
71 };
72
73 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
74                        uint16_t (*qmat16)[2][64],
75                        const uint16_t *quant_matrix,
76                        int bias, int qmin, int qmax, int intra)
77 {
78     int qscale;
79     int shift = 0;
80
81     for (qscale = qmin; qscale <= qmax; qscale++) {
82         int i;
83         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
84             dsp->fdct == ff_jpeg_fdct_islow_10 ||
85             dsp->fdct == ff_faandct) {
86             for (i = 0; i < 64; i++) {
87                 const int j = dsp->idct_permutation[i];
88                 /* 16 <= qscale * quant_matrix[i] <= 7905
89                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
90                  *             19952 <=              x  <= 249205026
91                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
92                  *           3444240 >= (1 << 36) / (x) >= 275 */
93
94                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
95                                         (qscale * quant_matrix[j]));
96             }
97         } else if (dsp->fdct == ff_fdct_ifast) {
98             for (i = 0; i < 64; i++) {
99                 const int j = dsp->idct_permutation[i];
100                 /* 16 <= qscale * quant_matrix[i] <= 7905
101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
102                  *             19952 <=              x  <= 249205026
103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
104                  *           3444240 >= (1 << 36) / (x) >= 275 */
105
106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
107                                         (ff_aanscales[i] * qscale *
108                                          quant_matrix[j]));
109             }
110         } else {
111             for (i = 0; i < 64; i++) {
112                 const int j = dsp->idct_permutation[i];
113                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
114                  * Assume x = qscale * quant_matrix[i]
115                  * So             16 <=              x  <= 7905
116                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
117                  * so          32768 >= (1 << 19) / (x) >= 67 */
118                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
119                                         (qscale * quant_matrix[j]));
120                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
121                 //                    (qscale * quant_matrix[i]);
122                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
123                                        (qscale * quant_matrix[j]);
124
125                 if (qmat16[qscale][0][i] == 0 ||
126                     qmat16[qscale][0][i] == 128 * 256)
127                     qmat16[qscale][0][i] = 128 * 256 - 1;
128                 qmat16[qscale][1][i] =
129                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
130                                 qmat16[qscale][0][i]);
131             }
132         }
133
134         for (i = intra; i < 64; i++) {
135             int64_t max = 8191;
136             if (dsp->fdct == ff_fdct_ifast) {
137                 max = (8191LL * ff_aanscales[i]) >> 14;
138             }
139             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
140                 shift++;
141             }
142         }
143     }
144     if (shift) {
145         av_log(NULL, AV_LOG_INFO,
146                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
147                QMAT_SHIFT - shift);
148     }
149 }
150
151 static inline void update_qscale(MpegEncContext *s)
152 {
153     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
154                 (FF_LAMBDA_SHIFT + 7);
155     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
156
157     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
158                  FF_LAMBDA_SHIFT;
159 }
160
161 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
162 {
163     int i;
164
165     if (matrix) {
166         put_bits(pb, 1, 1);
167         for (i = 0; i < 64; i++) {
168             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
169         }
170     } else
171         put_bits(pb, 1, 0);
172 }
173
174 /**
175  * init s->current_picture.qscale_table from s->lambda_table
176  */
177 void ff_init_qscale_tab(MpegEncContext *s)
178 {
179     int8_t * const qscale_table = s->current_picture.f.qscale_table;
180     int i;
181
182     for (i = 0; i < s->mb_num; i++) {
183         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
184         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
185         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
186                                                   s->avctx->qmax);
187     }
188 }
189
190 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst,
191                                     const AVFrame *src)
192 {
193     int i;
194
195     dst->pict_type              = src->pict_type;
196     dst->quality                = src->quality;
197     dst->coded_picture_number   = src->coded_picture_number;
198     dst->display_picture_number = src->display_picture_number;
199     //dst->reference              = src->reference;
200     dst->pts                    = src->pts;
201     dst->interlaced_frame       = src->interlaced_frame;
202     dst->top_field_first        = src->top_field_first;
203
204     if (s->avctx->me_threshold) {
205         if (!src->motion_val[0])
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
207         if (!src->mb_type)
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
209         if (!src->ref_index[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
211         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
212             av_log(s->avctx, AV_LOG_ERROR,
213                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
214                    src->motion_subsample_log2, dst->motion_subsample_log2);
215
216         memcpy(dst->mb_type, src->mb_type,
217                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
218
219         for (i = 0; i < 2; i++) {
220             int stride = ((16 * s->mb_width ) >>
221                           src->motion_subsample_log2) + 1;
222             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
223
224             if (src->motion_val[i] &&
225                 src->motion_val[i] != dst->motion_val[i]) {
226                 memcpy(dst->motion_val[i], src->motion_val[i],
227                        2 * stride * height * sizeof(int16_t));
228             }
229             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
230                 memcpy(dst->ref_index[i], src->ref_index[i],
231                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
232             }
233         }
234     }
235 }
236
237 static void update_duplicate_context_after_me(MpegEncContext *dst,
238                                               MpegEncContext *src)
239 {
240 #define COPY(a) dst->a= src->a
241     COPY(pict_type);
242     COPY(current_picture);
243     COPY(f_code);
244     COPY(b_code);
245     COPY(qscale);
246     COPY(lambda);
247     COPY(lambda2);
248     COPY(picture_in_gop_number);
249     COPY(gop_picture_number);
250     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
251     COPY(progressive_frame);    // FIXME don't set in encode_header
252     COPY(partitioned_frame);    // FIXME don't set in encode_header
253 #undef COPY
254 }
255
256 /**
257  * Set the given MpegEncContext to defaults for encoding.
258  * the changed fields will not depend upon the prior state of the MpegEncContext.
259  */
260 static void MPV_encode_defaults(MpegEncContext *s)
261 {
262     int i;
263     ff_MPV_common_defaults(s);
264
265     for (i = -16; i < 16; i++) {
266         default_fcode_tab[i + MAX_MV] = 1;
267     }
268     s->me.mv_penalty = default_mv_penalty;
269     s->fcode_tab     = default_fcode_tab;
270 }
271
272 /* init video encoder */
273 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
274 {
275     MpegEncContext *s = avctx->priv_data;
276     int i;
277     int chroma_h_shift, chroma_v_shift;
278
279     MPV_encode_defaults(s);
280
281     switch (avctx->codec_id) {
282     case AV_CODEC_ID_MPEG2VIDEO:
283         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
284             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
285             av_log(avctx, AV_LOG_ERROR,
286                    "only YUV420 and YUV422 are supported\n");
287             return -1;
288         }
289         break;
290     case AV_CODEC_ID_LJPEG:
291         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
292             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
293             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
294             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
295             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
296               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
297               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
298              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
299             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_MJPEG:
304         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
305             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
306             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
307               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
308              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
309             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
310             return -1;
311         }
312         break;
313     default:
314         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
315             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
316             return -1;
317         }
318     }
319
320     switch (avctx->pix_fmt) {
321     case AV_PIX_FMT_YUVJ422P:
322     case AV_PIX_FMT_YUV422P:
323         s->chroma_format = CHROMA_422;
324         break;
325     case AV_PIX_FMT_YUVJ420P:
326     case AV_PIX_FMT_YUV420P:
327     default:
328         s->chroma_format = CHROMA_420;
329         break;
330     }
331
332     s->bit_rate = avctx->bit_rate;
333     s->width    = avctx->width;
334     s->height   = avctx->height;
335     if (avctx->gop_size > 600 &&
336         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
337         av_log(avctx, AV_LOG_ERROR,
338                "Warning keyframe interval too large! reducing it ...\n");
339         avctx->gop_size = 600;
340     }
341     s->gop_size     = avctx->gop_size;
342     s->avctx        = avctx;
343     s->flags        = avctx->flags;
344     s->flags2       = avctx->flags2;
345     s->max_b_frames = avctx->max_b_frames;
346     s->codec_id     = avctx->codec->id;
347 #if FF_API_MPV_GLOBAL_OPTS
348     if (avctx->luma_elim_threshold)
349         s->luma_elim_threshold   = avctx->luma_elim_threshold;
350     if (avctx->chroma_elim_threshold)
351         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
352 #endif
353     s->strict_std_compliance = avctx->strict_std_compliance;
354     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
355     s->mpeg_quant         = avctx->mpeg_quant;
356     s->rtp_mode           = !!avctx->rtp_payload_size;
357     s->intra_dc_precision = avctx->intra_dc_precision;
358     s->user_specified_pts = AV_NOPTS_VALUE;
359
360     if (s->gop_size <= 1) {
361         s->intra_only = 1;
362         s->gop_size   = 12;
363     } else {
364         s->intra_only = 0;
365     }
366
367     s->me_method = avctx->me_method;
368
369     /* Fixed QSCALE */
370     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
371
372 #if FF_API_MPV_GLOBAL_OPTS
373     if (s->flags & CODEC_FLAG_QP_RD)
374         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
375 #endif
376
377     s->adaptive_quant = (s->avctx->lumi_masking ||
378                          s->avctx->dark_masking ||
379                          s->avctx->temporal_cplx_masking ||
380                          s->avctx->spatial_cplx_masking  ||
381                          s->avctx->p_masking      ||
382                          s->avctx->border_masking ||
383                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
384                         !s->fixed_qscale;
385
386     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
387
388     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
389         av_log(avctx, AV_LOG_ERROR,
390                "a vbv buffer size is needed, "
391                "for encoding with a maximum bitrate\n");
392         return -1;
393     }
394
395     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
396         av_log(avctx, AV_LOG_INFO,
397                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
401         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
406         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate &&
411         avctx->rc_max_rate == avctx->bit_rate &&
412         avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "impossible bitrate constraints, this will fail\n");
415     }
416
417     if (avctx->rc_buffer_size &&
418         avctx->bit_rate * (int64_t)avctx->time_base.num >
419             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
420         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
421         return -1;
422     }
423
424     if (!s->fixed_qscale &&
425         avctx->bit_rate * av_q2d(avctx->time_base) >
426             avctx->bit_rate_tolerance) {
427         av_log(avctx, AV_LOG_ERROR,
428                "bitrate tolerance too small for bitrate\n");
429         return -1;
430     }
431
432     if (s->avctx->rc_max_rate &&
433         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
434         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
435          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
436         90000LL * (avctx->rc_buffer_size - 1) >
437             s->avctx->rc_max_rate * 0xFFFFLL) {
438         av_log(avctx, AV_LOG_INFO,
439                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
440                "specified vbv buffer is too large for the given bitrate!\n");
441     }
442
443     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
444         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
445         s->codec_id != AV_CODEC_ID_FLV1) {
446         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
447         return -1;
448     }
449
450     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
451         av_log(avctx, AV_LOG_ERROR,
452                "OBMC is only supported with simple mb decision\n");
453         return -1;
454     }
455
456     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
457         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
458         return -1;
459     }
460
461     if (s->max_b_frames                    &&
462         s->codec_id != AV_CODEC_ID_MPEG4      &&
463         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
464         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
465         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
466         return -1;
467     }
468
469     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
470          s->codec_id == AV_CODEC_ID_H263  ||
471          s->codec_id == AV_CODEC_ID_H263P) &&
472         (avctx->sample_aspect_ratio.num > 255 ||
473          avctx->sample_aspect_ratio.den > 255)) {
474         av_log(avctx, AV_LOG_ERROR,
475                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
476                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
477         return -1;
478     }
479
480     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
481         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
482         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
483         return -1;
484     }
485
486     // FIXME mpeg2 uses that too
487     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
488         av_log(avctx, AV_LOG_ERROR,
489                "mpeg2 style quantization not supported by codec\n");
490         return -1;
491     }
492
493 #if FF_API_MPV_GLOBAL_OPTS
494     if (s->flags & CODEC_FLAG_CBP_RD)
495         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
496 #endif
497
498     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
499         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
500         return -1;
501     }
502
503     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
504         s->avctx->mb_decision != FF_MB_DECISION_RD) {
505         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
506         return -1;
507     }
508
509     if (s->avctx->scenechange_threshold < 1000000000 &&
510         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
511         av_log(avctx, AV_LOG_ERROR,
512                "closed gop with scene change detection are not supported yet, "
513                "set threshold to 1000000000\n");
514         return -1;
515     }
516
517     if (s->flags & CODEC_FLAG_LOW_DELAY) {
518         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
519             av_log(avctx, AV_LOG_ERROR,
520                   "low delay forcing is only available for mpeg2\n");
521             return -1;
522         }
523         if (s->max_b_frames != 0) {
524             av_log(avctx, AV_LOG_ERROR,
525                    "b frames cannot be used with low delay\n");
526             return -1;
527         }
528     }
529
530     if (s->q_scale_type == 1) {
531         if (avctx->qmax > 12) {
532             av_log(avctx, AV_LOG_ERROR,
533                    "non linear quant only supports qmax <= 12 currently\n");
534             return -1;
535         }
536     }
537
538     if (s->avctx->thread_count > 1         &&
539         s->codec_id != AV_CODEC_ID_MPEG4      &&
540         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
541         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
542         (s->codec_id != AV_CODEC_ID_H263P)) {
543         av_log(avctx, AV_LOG_ERROR,
544                "multi threaded encoding not supported by codec\n");
545         return -1;
546     }
547
548     if (s->avctx->thread_count < 1) {
549         av_log(avctx, AV_LOG_ERROR,
550                "automatic thread number detection not supported by codec,"
551                "patch welcome\n");
552         return -1;
553     }
554
555     if (s->avctx->thread_count > 1)
556         s->rtp_mode = 1;
557
558     if (!avctx->time_base.den || !avctx->time_base.num) {
559         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
560         return -1;
561     }
562
563     i = (INT_MAX / 2 + 128) >> 8;
564     if (avctx->me_threshold >= i) {
565         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
566                i - 1);
567         return -1;
568     }
569     if (avctx->mb_threshold >= i) {
570         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
571                i - 1);
572         return -1;
573     }
574
575     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
576         av_log(avctx, AV_LOG_INFO,
577                "notice: b_frame_strategy only affects the first pass\n");
578         avctx->b_frame_strategy = 0;
579     }
580
581     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
582     if (i > 1) {
583         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
584         avctx->time_base.den /= i;
585         avctx->time_base.num /= i;
586         //return -1;
587     }
588
589     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
590         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
591         // (a + x * 3 / 8) / x
592         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
593         s->inter_quant_bias = 0;
594     } else {
595         s->intra_quant_bias = 0;
596         // (a - x / 4) / x
597         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
598     }
599
600     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
601         s->intra_quant_bias = avctx->intra_quant_bias;
602     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
603         s->inter_quant_bias = avctx->inter_quant_bias;
604
605     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
606                                      &chroma_v_shift);
607
608     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
609         s->avctx->time_base.den > (1 << 16) - 1) {
610         av_log(avctx, AV_LOG_ERROR,
611                "timebase %d/%d not supported by MPEG 4 standard, "
612                "the maximum admitted value for the timebase denominator "
613                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
614                (1 << 16) - 1);
615         return -1;
616     }
617     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
618
619 #if FF_API_MPV_GLOBAL_OPTS
620     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
621         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
622     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
623         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
624     if (avctx->quantizer_noise_shaping)
625         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
626 #endif
627
628     switch (avctx->codec->id) {
629     case AV_CODEC_ID_MPEG1VIDEO:
630         s->out_format = FMT_MPEG1;
631         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
632         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
633         break;
634     case AV_CODEC_ID_MPEG2VIDEO:
635         s->out_format = FMT_MPEG1;
636         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
637         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
638         s->rtp_mode   = 1;
639         break;
640     case AV_CODEC_ID_LJPEG:
641     case AV_CODEC_ID_MJPEG:
642         s->out_format = FMT_MJPEG;
643         s->intra_only = 1; /* force intra only for jpeg */
644         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
645             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
646             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
647             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
648             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
649         } else {
650             s->mjpeg_vsample[0] = 2;
651             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
652             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
653             s->mjpeg_hsample[0] = 2;
654             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
655             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
656         }
657         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
658             ff_mjpeg_encode_init(s) < 0)
659             return -1;
660         avctx->delay = 0;
661         s->low_delay = 1;
662         break;
663     case AV_CODEC_ID_H261:
664         if (!CONFIG_H261_ENCODER)
665             return -1;
666         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
667             av_log(avctx, AV_LOG_ERROR,
668                    "The specified picture size of %dx%d is not valid for the "
669                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
670                     s->width, s->height);
671             return -1;
672         }
673         s->out_format = FMT_H261;
674         avctx->delay  = 0;
675         s->low_delay  = 1;
676         break;
677     case AV_CODEC_ID_H263:
678         if (!CONFIG_H263_ENCODER)
679         return -1;
680         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
681                              s->width, s->height) == 8) {
682             av_log(avctx, AV_LOG_INFO,
683                    "The specified picture size of %dx%d is not valid for "
684                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
685                    "352x288, 704x576, and 1408x1152."
686                    "Try H.263+.\n", s->width, s->height);
687             return -1;
688         }
689         s->out_format = FMT_H263;
690         avctx->delay  = 0;
691         s->low_delay  = 1;
692         break;
693     case AV_CODEC_ID_H263P:
694         s->out_format = FMT_H263;
695         s->h263_plus  = 1;
696         /* Fx */
697         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
698         s->modified_quant  = s->h263_aic;
699         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
700         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
701
702         /* /Fx */
703         /* These are just to be sure */
704         avctx->delay = 0;
705         s->low_delay = 1;
706         break;
707     case AV_CODEC_ID_FLV1:
708         s->out_format      = FMT_H263;
709         s->h263_flv        = 2; /* format = 1; 11-bit codes */
710         s->unrestricted_mv = 1;
711         s->rtp_mode  = 0; /* don't allow GOB */
712         avctx->delay = 0;
713         s->low_delay = 1;
714         break;
715     case AV_CODEC_ID_RV10:
716         s->out_format = FMT_H263;
717         avctx->delay  = 0;
718         s->low_delay  = 1;
719         break;
720     case AV_CODEC_ID_RV20:
721         s->out_format      = FMT_H263;
722         avctx->delay       = 0;
723         s->low_delay       = 1;
724         s->modified_quant  = 1;
725         s->h263_aic        = 1;
726         s->h263_plus       = 1;
727         s->loop_filter     = 1;
728         s->unrestricted_mv = 0;
729         break;
730     case AV_CODEC_ID_MPEG4:
731         s->out_format      = FMT_H263;
732         s->h263_pred       = 1;
733         s->unrestricted_mv = 1;
734         s->low_delay       = s->max_b_frames ? 0 : 1;
735         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
736         break;
737     case AV_CODEC_ID_MSMPEG4V2:
738         s->out_format      = FMT_H263;
739         s->h263_pred       = 1;
740         s->unrestricted_mv = 1;
741         s->msmpeg4_version = 2;
742         avctx->delay       = 0;
743         s->low_delay       = 1;
744         break;
745     case AV_CODEC_ID_MSMPEG4V3:
746         s->out_format        = FMT_H263;
747         s->h263_pred         = 1;
748         s->unrestricted_mv   = 1;
749         s->msmpeg4_version   = 3;
750         s->flipflop_rounding = 1;
751         avctx->delay         = 0;
752         s->low_delay         = 1;
753         break;
754     case AV_CODEC_ID_WMV1:
755         s->out_format        = FMT_H263;
756         s->h263_pred         = 1;
757         s->unrestricted_mv   = 1;
758         s->msmpeg4_version   = 4;
759         s->flipflop_rounding = 1;
760         avctx->delay         = 0;
761         s->low_delay         = 1;
762         break;
763     case AV_CODEC_ID_WMV2:
764         s->out_format        = FMT_H263;
765         s->h263_pred         = 1;
766         s->unrestricted_mv   = 1;
767         s->msmpeg4_version   = 5;
768         s->flipflop_rounding = 1;
769         avctx->delay         = 0;
770         s->low_delay         = 1;
771         break;
772     default:
773         return -1;
774     }
775
776     avctx->has_b_frames = !s->low_delay;
777
778     s->encoding = 1;
779
780     s->progressive_frame    =
781     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
782                                                 CODEC_FLAG_INTERLACED_ME) ||
783                                 s->alternate_scan);
784
785     /* init */
786     if (ff_MPV_common_init(s) < 0)
787         return -1;
788
789     if (ARCH_X86)
790         ff_MPV_encode_init_x86(s);
791
792     if (!s->dct_quantize)
793         s->dct_quantize = ff_dct_quantize_c;
794     if (!s->denoise_dct)
795         s->denoise_dct  = denoise_dct_c;
796     s->fast_dct_quantize = s->dct_quantize;
797     if (avctx->trellis)
798         s->dct_quantize  = dct_quantize_trellis_c;
799
800     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
801         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
802
803     s->quant_precision = 5;
804
805     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
806     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
807
808     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
809         ff_h261_encode_init(s);
810     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
811         ff_h263_encode_init(s);
812     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
813         ff_msmpeg4_encode_init(s);
814     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
815         && s->out_format == FMT_MPEG1)
816         ff_mpeg1_encode_init(s);
817
818     /* init q matrix */
819     for (i = 0; i < 64; i++) {
820         int j = s->dsp.idct_permutation[i];
821         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
822             s->mpeg_quant) {
823             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
824             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
825         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
826             s->intra_matrix[j] =
827             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
828         } else {
829             /* mpeg1/2 */
830             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
831             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
832         }
833         if (s->avctx->intra_matrix)
834             s->intra_matrix[j] = s->avctx->intra_matrix[i];
835         if (s->avctx->inter_matrix)
836             s->inter_matrix[j] = s->avctx->inter_matrix[i];
837     }
838
839     /* precompute matrix */
840     /* for mjpeg, we do include qscale in the matrix */
841     if (s->out_format != FMT_MJPEG) {
842         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
843                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
844                           31, 1);
845         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
846                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
847                           31, 0);
848     }
849
850     if (ff_rate_control_init(s) < 0)
851         return -1;
852
853     return 0;
854 }
855
856 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
857 {
858     MpegEncContext *s = avctx->priv_data;
859
860     ff_rate_control_uninit(s);
861
862     ff_MPV_common_end(s);
863     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
864         s->out_format == FMT_MJPEG)
865         ff_mjpeg_encode_close(s);
866
867     av_freep(&avctx->extradata);
868
869     return 0;
870 }
871
872 static int get_sae(uint8_t *src, int ref, int stride)
873 {
874     int x,y;
875     int acc = 0;
876
877     for (y = 0; y < 16; y++) {
878         for (x = 0; x < 16; x++) {
879             acc += FFABS(src[x + y * stride] - ref);
880         }
881     }
882
883     return acc;
884 }
885
886 static int get_intra_count(MpegEncContext *s, uint8_t *src,
887                            uint8_t *ref, int stride)
888 {
889     int x, y, w, h;
890     int acc = 0;
891
892     w = s->width  & ~15;
893     h = s->height & ~15;
894
895     for (y = 0; y < h; y += 16) {
896         for (x = 0; x < w; x += 16) {
897             int offset = x + y * stride;
898             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
899                                      16);
900             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
901             int sae  = get_sae(src + offset, mean, stride);
902
903             acc += sae + 500 < sad;
904         }
905     }
906     return acc;
907 }
908
909
910 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
911 {
912     AVFrame *pic = NULL;
913     int64_t pts;
914     int i, display_picture_number = 0;
915     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
916                                                  (s->low_delay ? 0 : 1);
917     int direct = 1;
918
919     if (pic_arg) {
920         pts = pic_arg->pts;
921         display_picture_number = s->input_picture_number++;
922
923         if (pts != AV_NOPTS_VALUE) {
924             if (s->user_specified_pts != AV_NOPTS_VALUE) {
925                 int64_t time = pts;
926                 int64_t last = s->user_specified_pts;
927
928                 if (time <= last) {
929                     av_log(s->avctx, AV_LOG_ERROR,
930                            "Error, Invalid timestamp=%"PRId64", "
931                            "last=%"PRId64"\n", pts, s->user_specified_pts);
932                     return -1;
933                 }
934
935                 if (!s->low_delay && display_picture_number == 1)
936                     s->dts_delta = time - last;
937             }
938             s->user_specified_pts = pts;
939         } else {
940             if (s->user_specified_pts != AV_NOPTS_VALUE) {
941                 s->user_specified_pts =
942                 pts = s->user_specified_pts + 1;
943                 av_log(s->avctx, AV_LOG_INFO,
944                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
945                        pts);
946             } else {
947                 pts = display_picture_number;
948             }
949         }
950     }
951
952     if (pic_arg) {
953         if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
954             direct = 0;
955         if (pic_arg->linesize[0] != s->linesize)
956             direct = 0;
957         if (pic_arg->linesize[1] != s->uvlinesize)
958             direct = 0;
959         if (pic_arg->linesize[2] != s->uvlinesize)
960             direct = 0;
961
962         av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
963                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
964
965         if (direct) {
966             i = ff_find_unused_picture(s, 1);
967             if (i < 0)
968                 return i;
969
970             pic = &s->picture[i].f;
971             pic->reference = 3;
972
973             for (i = 0; i < 4; i++) {
974                 pic->data[i]     = pic_arg->data[i];
975                 pic->linesize[i] = pic_arg->linesize[i];
976             }
977             if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
978                 return -1;
979             }
980         } else {
981             i = ff_find_unused_picture(s, 0);
982             if (i < 0)
983                 return i;
984
985             pic = &s->picture[i].f;
986             pic->reference = 3;
987
988             if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
989                 return -1;
990             }
991
992             if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
993                 pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
994                 pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
995                 // empty
996             } else {
997                 int h_chroma_shift, v_chroma_shift;
998                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
999                                                  &h_chroma_shift,
1000                                                  &v_chroma_shift);
1001
1002                 for (i = 0; i < 3; i++) {
1003                     int src_stride = pic_arg->linesize[i];
1004                     int dst_stride = i ? s->uvlinesize : s->linesize;
1005                     int h_shift = i ? h_chroma_shift : 0;
1006                     int v_shift = i ? v_chroma_shift : 0;
1007                     int w = s->width  >> h_shift;
1008                     int h = s->height >> v_shift;
1009                     uint8_t *src = pic_arg->data[i];
1010                     uint8_t *dst = pic->data[i];
1011
1012                     if (!s->avctx->rc_buffer_size)
1013                         dst += INPLACE_OFFSET;
1014
1015                     if (src_stride == dst_stride)
1016                         memcpy(dst, src, src_stride * h);
1017                     else {
1018                         while (h--) {
1019                             memcpy(dst, src, w);
1020                             dst += dst_stride;
1021                             src += src_stride;
1022                         }
1023                     }
1024                 }
1025             }
1026         }
1027         copy_picture_attributes(s, pic, pic_arg);
1028         pic->display_picture_number = display_picture_number;
1029         pic->pts = pts; // we set this here to avoid modifiying pic_arg
1030     }
1031
1032     /* shift buffer entries */
1033     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1034         s->input_picture[i - 1] = s->input_picture[i];
1035
1036     s->input_picture[encoding_delay] = (Picture*) pic;
1037
1038     return 0;
1039 }
1040
1041 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1042 {
1043     int x, y, plane;
1044     int score = 0;
1045     int64_t score64 = 0;
1046
1047     for (plane = 0; plane < 3; plane++) {
1048         const int stride = p->f.linesize[plane];
1049         const int bw = plane ? 1 : 2;
1050         for (y = 0; y < s->mb_height * bw; y++) {
1051             for (x = 0; x < s->mb_width * bw; x++) {
1052                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1053                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1054                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1055                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1056
1057                 switch (s->avctx->frame_skip_exp) {
1058                 case 0: score    =  FFMAX(score, v);          break;
1059                 case 1: score   += FFABS(v);                  break;
1060                 case 2: score   += v * v;                     break;
1061                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1062                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1063                 }
1064             }
1065         }
1066     }
1067
1068     if (score)
1069         score64 = score;
1070
1071     if (score64 < s->avctx->frame_skip_threshold)
1072         return 1;
1073     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1074         return 1;
1075     return 0;
1076 }
1077
1078 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1079 {
1080     AVPacket pkt = { 0 };
1081     int ret, got_output;
1082
1083     av_init_packet(&pkt);
1084     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1085     if (ret < 0)
1086         return ret;
1087
1088     ret = pkt.size;
1089     av_free_packet(&pkt);
1090     return ret;
1091 }
1092
1093 static int estimate_best_b_count(MpegEncContext *s)
1094 {
1095     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1096     AVCodecContext *c = avcodec_alloc_context3(NULL);
1097     AVFrame input[FF_MAX_B_FRAMES + 2];
1098     const int scale = s->avctx->brd_scale;
1099     int i, j, out_size, p_lambda, b_lambda, lambda2;
1100     int64_t best_rd  = INT64_MAX;
1101     int best_b_count = -1;
1102
1103     assert(scale >= 0 && scale <= 3);
1104
1105     //emms_c();
1106     //s->next_picture_ptr->quality;
1107     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1108     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1109     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1110     if (!b_lambda) // FIXME we should do this somewhere else
1111         b_lambda = p_lambda;
1112     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1113                FF_LAMBDA_SHIFT;
1114
1115     c->width        = s->width  >> scale;
1116     c->height       = s->height >> scale;
1117     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1118                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1119     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1120     c->mb_decision  = s->avctx->mb_decision;
1121     c->me_cmp       = s->avctx->me_cmp;
1122     c->mb_cmp       = s->avctx->mb_cmp;
1123     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1124     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1125     c->time_base    = s->avctx->time_base;
1126     c->max_b_frames = s->max_b_frames;
1127
1128     if (avcodec_open2(c, codec, NULL) < 0)
1129         return -1;
1130
1131     for (i = 0; i < s->max_b_frames + 2; i++) {
1132         int ysize = c->width * c->height;
1133         int csize = (c->width / 2) * (c->height / 2);
1134         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1135                                                 s->next_picture_ptr;
1136
1137         avcodec_get_frame_defaults(&input[i]);
1138         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1139         input[i].data[1]     = input[i].data[0] + ysize;
1140         input[i].data[2]     = input[i].data[1] + csize;
1141         input[i].linesize[0] = c->width;
1142         input[i].linesize[1] =
1143         input[i].linesize[2] = c->width / 2;
1144
1145         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1146             pre_input = *pre_input_ptr;
1147
1148             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1149                 pre_input.f.data[0] += INPLACE_OFFSET;
1150                 pre_input.f.data[1] += INPLACE_OFFSET;
1151                 pre_input.f.data[2] += INPLACE_OFFSET;
1152             }
1153
1154             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1155                                  pre_input.f.data[0], pre_input.f.linesize[0],
1156                                  c->width,      c->height);
1157             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1158                                  pre_input.f.data[1], pre_input.f.linesize[1],
1159                                  c->width >> 1, c->height >> 1);
1160             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1161                                  pre_input.f.data[2], pre_input.f.linesize[2],
1162                                  c->width >> 1, c->height >> 1);
1163         }
1164     }
1165
1166     for (j = 0; j < s->max_b_frames + 1; j++) {
1167         int64_t rd = 0;
1168
1169         if (!s->input_picture[j])
1170             break;
1171
1172         c->error[0] = c->error[1] = c->error[2] = 0;
1173
1174         input[0].pict_type = AV_PICTURE_TYPE_I;
1175         input[0].quality   = 1 * FF_QP2LAMBDA;
1176
1177         out_size = encode_frame(c, &input[0]);
1178
1179         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1180
1181         for (i = 0; i < s->max_b_frames + 1; i++) {
1182             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1183
1184             input[i + 1].pict_type = is_p ?
1185                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1186             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1187
1188             out_size = encode_frame(c, &input[i + 1]);
1189
1190             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1191         }
1192
1193         /* get the delayed frames */
1194         while (out_size) {
1195             out_size = encode_frame(c, NULL);
1196             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1197         }
1198
1199         rd += c->error[0] + c->error[1] + c->error[2];
1200
1201         if (rd < best_rd) {
1202             best_rd = rd;
1203             best_b_count = j;
1204         }
1205     }
1206
1207     avcodec_close(c);
1208     av_freep(&c);
1209
1210     for (i = 0; i < s->max_b_frames + 2; i++) {
1211         av_freep(&input[i].data[0]);
1212     }
1213
1214     return best_b_count;
1215 }
1216
1217 static int select_input_picture(MpegEncContext *s)
1218 {
1219     int i;
1220
1221     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1222         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1223     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1224
1225     /* set next picture type & ordering */
1226     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1227         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1228             s->next_picture_ptr == NULL || s->intra_only) {
1229             s->reordered_input_picture[0] = s->input_picture[0];
1230             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1231             s->reordered_input_picture[0]->f.coded_picture_number =
1232                 s->coded_picture_number++;
1233         } else {
1234             int b_frames;
1235
1236             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1237                 if (s->picture_in_gop_number < s->gop_size &&
1238                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1239                     // FIXME check that te gop check above is +-1 correct
1240                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1241                         for (i = 0; i < 4; i++)
1242                             s->input_picture[0]->f.data[i] = NULL;
1243                         s->input_picture[0]->f.type = 0;
1244                     } else {
1245                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1246                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1247
1248                         s->avctx->release_buffer(s->avctx,
1249                                                  &s->input_picture[0]->f);
1250                     }
1251
1252                     emms_c();
1253                     ff_vbv_update(s, 0);
1254
1255                     goto no_output_pic;
1256                 }
1257             }
1258
1259             if (s->flags & CODEC_FLAG_PASS2) {
1260                 for (i = 0; i < s->max_b_frames + 1; i++) {
1261                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1262
1263                     if (pict_num >= s->rc_context.num_entries)
1264                         break;
1265                     if (!s->input_picture[i]) {
1266                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1267                         break;
1268                     }
1269
1270                     s->input_picture[i]->f.pict_type =
1271                         s->rc_context.entry[pict_num].new_pict_type;
1272                 }
1273             }
1274
1275             if (s->avctx->b_frame_strategy == 0) {
1276                 b_frames = s->max_b_frames;
1277                 while (b_frames && !s->input_picture[b_frames])
1278                     b_frames--;
1279             } else if (s->avctx->b_frame_strategy == 1) {
1280                 for (i = 1; i < s->max_b_frames + 1; i++) {
1281                     if (s->input_picture[i] &&
1282                         s->input_picture[i]->b_frame_score == 0) {
1283                         s->input_picture[i]->b_frame_score =
1284                             get_intra_count(s,
1285                                             s->input_picture[i    ]->f.data[0],
1286                                             s->input_picture[i - 1]->f.data[0],
1287                                             s->linesize) + 1;
1288                     }
1289                 }
1290                 for (i = 0; i < s->max_b_frames + 1; i++) {
1291                     if (s->input_picture[i] == NULL ||
1292                         s->input_picture[i]->b_frame_score - 1 >
1293                             s->mb_num / s->avctx->b_sensitivity)
1294                         break;
1295                 }
1296
1297                 b_frames = FFMAX(0, i - 1);
1298
1299                 /* reset scores */
1300                 for (i = 0; i < b_frames + 1; i++) {
1301                     s->input_picture[i]->b_frame_score = 0;
1302                 }
1303             } else if (s->avctx->b_frame_strategy == 2) {
1304                 b_frames = estimate_best_b_count(s);
1305             } else {
1306                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1307                 b_frames = 0;
1308             }
1309
1310             emms_c();
1311
1312             for (i = b_frames - 1; i >= 0; i--) {
1313                 int type = s->input_picture[i]->f.pict_type;
1314                 if (type && type != AV_PICTURE_TYPE_B)
1315                     b_frames = i;
1316             }
1317             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1318                 b_frames == s->max_b_frames) {
1319                 av_log(s->avctx, AV_LOG_ERROR,
1320                        "warning, too many b frames in a row\n");
1321             }
1322
1323             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1324                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1325                     s->gop_size > s->picture_in_gop_number) {
1326                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1327                 } else {
1328                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1329                         b_frames = 0;
1330                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1331                 }
1332             }
1333
1334             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1335                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1336                 b_frames--;
1337
1338             s->reordered_input_picture[0] = s->input_picture[b_frames];
1339             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1340                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1341             s->reordered_input_picture[0]->f.coded_picture_number =
1342                 s->coded_picture_number++;
1343             for (i = 0; i < b_frames; i++) {
1344                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1345                 s->reordered_input_picture[i + 1]->f.pict_type =
1346                     AV_PICTURE_TYPE_B;
1347                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1348                     s->coded_picture_number++;
1349             }
1350         }
1351     }
1352 no_output_pic:
1353     if (s->reordered_input_picture[0]) {
1354         s->reordered_input_picture[0]->f.reference =
1355            s->reordered_input_picture[0]->f.pict_type !=
1356                AV_PICTURE_TYPE_B ? 3 : 0;
1357
1358         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1359
1360         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1361             s->avctx->rc_buffer_size) {
1362             // input is a shared pix, so we can't modifiy it -> alloc a new
1363             // one & ensure that the shared one is reuseable
1364
1365             Picture *pic;
1366             int i = ff_find_unused_picture(s, 0);
1367             if (i < 0)
1368                 return i;
1369             pic = &s->picture[i];
1370
1371             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1372             if (ff_alloc_picture(s, pic, 0) < 0) {
1373                 return -1;
1374             }
1375
1376             /* mark us unused / free shared pic */
1377             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1378                 s->avctx->release_buffer(s->avctx,
1379                                          &s->reordered_input_picture[0]->f);
1380             for (i = 0; i < 4; i++)
1381                 s->reordered_input_picture[0]->f.data[i] = NULL;
1382             s->reordered_input_picture[0]->f.type = 0;
1383
1384             copy_picture_attributes(s, &pic->f,
1385                                     &s->reordered_input_picture[0]->f);
1386
1387             s->current_picture_ptr = pic;
1388         } else {
1389             // input is not a shared pix -> reuse buffer for current_pix
1390
1391             assert(s->reordered_input_picture[0]->f.type ==
1392                        FF_BUFFER_TYPE_USER ||
1393                    s->reordered_input_picture[0]->f.type ==
1394                        FF_BUFFER_TYPE_INTERNAL);
1395
1396             s->current_picture_ptr = s->reordered_input_picture[0];
1397             for (i = 0; i < 4; i++) {
1398                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1399             }
1400         }
1401         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1402
1403         s->picture_number = s->new_picture.f.display_picture_number;
1404     } else {
1405         memset(&s->new_picture, 0, sizeof(Picture));
1406     }
1407     return 0;
1408 }
1409
1410 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1411                           const AVFrame *pic_arg, int *got_packet)
1412 {
1413     MpegEncContext *s = avctx->priv_data;
1414     int i, stuffing_count, ret;
1415     int context_count = s->slice_context_count;
1416
1417     s->picture_in_gop_number++;
1418
1419     if (load_input_picture(s, pic_arg) < 0)
1420         return -1;
1421
1422     if (select_input_picture(s) < 0) {
1423         return -1;
1424     }
1425
1426     /* output? */
1427     if (s->new_picture.f.data[0]) {
1428         if (!pkt->data &&
1429             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1430             return ret;
1431         if (s->mb_info) {
1432             s->mb_info_ptr = av_packet_new_side_data(pkt,
1433                                  AV_PKT_DATA_H263_MB_INFO,
1434                                  s->mb_width*s->mb_height*12);
1435             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1436         }
1437
1438         for (i = 0; i < context_count; i++) {
1439             int start_y = s->thread_context[i]->start_mb_y;
1440             int   end_y = s->thread_context[i]->  end_mb_y;
1441             int h       = s->mb_height;
1442             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1443             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1444
1445             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1446         }
1447
1448         s->pict_type = s->new_picture.f.pict_type;
1449         //emms_c();
1450         ff_MPV_frame_start(s, avctx);
1451 vbv_retry:
1452         if (encode_picture(s, s->picture_number) < 0)
1453             return -1;
1454
1455         avctx->header_bits = s->header_bits;
1456         avctx->mv_bits     = s->mv_bits;
1457         avctx->misc_bits   = s->misc_bits;
1458         avctx->i_tex_bits  = s->i_tex_bits;
1459         avctx->p_tex_bits  = s->p_tex_bits;
1460         avctx->i_count     = s->i_count;
1461         // FIXME f/b_count in avctx
1462         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1463         avctx->skip_count  = s->skip_count;
1464
1465         ff_MPV_frame_end(s);
1466
1467         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1468             ff_mjpeg_encode_picture_trailer(s);
1469
1470         if (avctx->rc_buffer_size) {
1471             RateControlContext *rcc = &s->rc_context;
1472             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1473
1474             if (put_bits_count(&s->pb) > max_size &&
1475                 s->lambda < s->avctx->lmax) {
1476                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1477                                        (s->qscale + 1) / s->qscale);
1478                 if (s->adaptive_quant) {
1479                     int i;
1480                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1481                         s->lambda_table[i] =
1482                             FFMAX(s->lambda_table[i] + 1,
1483                                   s->lambda_table[i] * (s->qscale + 1) /
1484                                   s->qscale);
1485                 }
1486                 s->mb_skipped = 0;        // done in MPV_frame_start()
1487                 // done in encode_picture() so we must undo it
1488                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1489                     if (s->flipflop_rounding          ||
1490                         s->codec_id == AV_CODEC_ID_H263P ||
1491                         s->codec_id == AV_CODEC_ID_MPEG4)
1492                         s->no_rounding ^= 1;
1493                 }
1494                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1495                     s->time_base       = s->last_time_base;
1496                     s->last_non_b_time = s->time - s->pp_time;
1497                 }
1498                 for (i = 0; i < context_count; i++) {
1499                     PutBitContext *pb = &s->thread_context[i]->pb;
1500                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1501                 }
1502                 goto vbv_retry;
1503             }
1504
1505             assert(s->avctx->rc_max_rate);
1506         }
1507
1508         if (s->flags & CODEC_FLAG_PASS1)
1509             ff_write_pass1_stats(s);
1510
1511         for (i = 0; i < 4; i++) {
1512             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1513             avctx->error[i] += s->current_picture_ptr->f.error[i];
1514         }
1515
1516         if (s->flags & CODEC_FLAG_PASS1)
1517             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1518                    avctx->i_tex_bits + avctx->p_tex_bits ==
1519                        put_bits_count(&s->pb));
1520         flush_put_bits(&s->pb);
1521         s->frame_bits  = put_bits_count(&s->pb);
1522
1523         stuffing_count = ff_vbv_update(s, s->frame_bits);
1524         if (stuffing_count) {
1525             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1526                     stuffing_count + 50) {
1527                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1528                 return -1;
1529             }
1530
1531             switch (s->codec_id) {
1532             case AV_CODEC_ID_MPEG1VIDEO:
1533             case AV_CODEC_ID_MPEG2VIDEO:
1534                 while (stuffing_count--) {
1535                     put_bits(&s->pb, 8, 0);
1536                 }
1537             break;
1538             case AV_CODEC_ID_MPEG4:
1539                 put_bits(&s->pb, 16, 0);
1540                 put_bits(&s->pb, 16, 0x1C3);
1541                 stuffing_count -= 4;
1542                 while (stuffing_count--) {
1543                     put_bits(&s->pb, 8, 0xFF);
1544                 }
1545             break;
1546             default:
1547                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1548             }
1549             flush_put_bits(&s->pb);
1550             s->frame_bits  = put_bits_count(&s->pb);
1551         }
1552
1553         /* update mpeg1/2 vbv_delay for CBR */
1554         if (s->avctx->rc_max_rate                          &&
1555             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1556             s->out_format == FMT_MPEG1                     &&
1557             90000LL * (avctx->rc_buffer_size - 1) <=
1558                 s->avctx->rc_max_rate * 0xFFFFLL) {
1559             int vbv_delay, min_delay;
1560             double inbits  = s->avctx->rc_max_rate *
1561                              av_q2d(s->avctx->time_base);
1562             int    minbits = s->frame_bits - 8 *
1563                              (s->vbv_delay_ptr - s->pb.buf - 1);
1564             double bits    = s->rc_context.buffer_index + minbits - inbits;
1565
1566             if (bits < 0)
1567                 av_log(s->avctx, AV_LOG_ERROR,
1568                        "Internal error, negative bits\n");
1569
1570             assert(s->repeat_first_field == 0);
1571
1572             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1573             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1574                         s->avctx->rc_max_rate;
1575
1576             vbv_delay = FFMAX(vbv_delay, min_delay);
1577
1578             assert(vbv_delay < 0xFFFF);
1579
1580             s->vbv_delay_ptr[0] &= 0xF8;
1581             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1582             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1583             s->vbv_delay_ptr[2] &= 0x07;
1584             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1585             avctx->vbv_delay     = vbv_delay * 300;
1586         }
1587         s->total_bits     += s->frame_bits;
1588         avctx->frame_bits  = s->frame_bits;
1589
1590         pkt->pts = s->current_picture.f.pts;
1591         if (!s->low_delay) {
1592             if (!s->current_picture.f.coded_picture_number)
1593                 pkt->dts = pkt->pts - s->dts_delta;
1594             else
1595                 pkt->dts = s->reordered_pts;
1596             s->reordered_pts = s->input_picture[0]->f.pts;
1597         } else
1598             pkt->dts = pkt->pts;
1599         if (s->current_picture.f.key_frame)
1600             pkt->flags |= AV_PKT_FLAG_KEY;
1601         if (s->mb_info)
1602             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1603     } else {
1604         s->frame_bits = 0;
1605     }
1606     assert((s->frame_bits & 7) == 0);
1607
1608     pkt->size = s->frame_bits / 8;
1609     *got_packet = !!pkt->size;
1610     return 0;
1611 }
1612
1613 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1614                                                 int n, int threshold)
1615 {
1616     static const char tab[64] = {
1617         3, 2, 2, 1, 1, 1, 1, 1,
1618         1, 1, 1, 1, 1, 1, 1, 1,
1619         1, 1, 1, 1, 1, 1, 1, 1,
1620         0, 0, 0, 0, 0, 0, 0, 0,
1621         0, 0, 0, 0, 0, 0, 0, 0,
1622         0, 0, 0, 0, 0, 0, 0, 0,
1623         0, 0, 0, 0, 0, 0, 0, 0,
1624         0, 0, 0, 0, 0, 0, 0, 0
1625     };
1626     int score = 0;
1627     int run = 0;
1628     int i;
1629     int16_t *block = s->block[n];
1630     const int last_index = s->block_last_index[n];
1631     int skip_dc;
1632
1633     if (threshold < 0) {
1634         skip_dc = 0;
1635         threshold = -threshold;
1636     } else
1637         skip_dc = 1;
1638
1639     /* Are all we could set to zero already zero? */
1640     if (last_index <= skip_dc - 1)
1641         return;
1642
1643     for (i = 0; i <= last_index; i++) {
1644         const int j = s->intra_scantable.permutated[i];
1645         const int level = FFABS(block[j]);
1646         if (level == 1) {
1647             if (skip_dc && i == 0)
1648                 continue;
1649             score += tab[run];
1650             run = 0;
1651         } else if (level > 1) {
1652             return;
1653         } else {
1654             run++;
1655         }
1656     }
1657     if (score >= threshold)
1658         return;
1659     for (i = skip_dc; i <= last_index; i++) {
1660         const int j = s->intra_scantable.permutated[i];
1661         block[j] = 0;
1662     }
1663     if (block[0])
1664         s->block_last_index[n] = 0;
1665     else
1666         s->block_last_index[n] = -1;
1667 }
1668
1669 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1670                                int last_index)
1671 {
1672     int i;
1673     const int maxlevel = s->max_qcoeff;
1674     const int minlevel = s->min_qcoeff;
1675     int overflow = 0;
1676
1677     if (s->mb_intra) {
1678         i = 1; // skip clipping of intra dc
1679     } else
1680         i = 0;
1681
1682     for (; i <= last_index; i++) {
1683         const int j = s->intra_scantable.permutated[i];
1684         int level = block[j];
1685
1686         if (level > maxlevel) {
1687             level = maxlevel;
1688             overflow++;
1689         } else if (level < minlevel) {
1690             level = minlevel;
1691             overflow++;
1692         }
1693
1694         block[j] = level;
1695     }
1696
1697     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1698         av_log(s->avctx, AV_LOG_INFO,
1699                "warning, clipping %d dct coefficients to %d..%d\n",
1700                overflow, minlevel, maxlevel);
1701 }
1702
1703 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1704 {
1705     int x, y;
1706     // FIXME optimize
1707     for (y = 0; y < 8; y++) {
1708         for (x = 0; x < 8; x++) {
1709             int x2, y2;
1710             int sum = 0;
1711             int sqr = 0;
1712             int count = 0;
1713
1714             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1715                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1716                     int v = ptr[x2 + y2 * stride];
1717                     sum += v;
1718                     sqr += v * v;
1719                     count++;
1720                 }
1721             }
1722             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1723         }
1724     }
1725 }
1726
1727 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1728                                                 int motion_x, int motion_y,
1729                                                 int mb_block_height,
1730                                                 int mb_block_count)
1731 {
1732     int16_t weight[8][64];
1733     int16_t orig[8][64];
1734     const int mb_x = s->mb_x;
1735     const int mb_y = s->mb_y;
1736     int i;
1737     int skip_dct[8];
1738     int dct_offset = s->linesize * 8; // default for progressive frames
1739     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1740     int wrap_y, wrap_c;
1741
1742     for (i = 0; i < mb_block_count; i++)
1743         skip_dct[i] = s->skipdct;
1744
1745     if (s->adaptive_quant) {
1746         const int last_qp = s->qscale;
1747         const int mb_xy = mb_x + mb_y * s->mb_stride;
1748
1749         s->lambda = s->lambda_table[mb_xy];
1750         update_qscale(s);
1751
1752         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1753             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1754             s->dquant = s->qscale - last_qp;
1755
1756             if (s->out_format == FMT_H263) {
1757                 s->dquant = av_clip(s->dquant, -2, 2);
1758
1759                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1760                     if (!s->mb_intra) {
1761                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1762                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1763                                 s->dquant = 0;
1764                         }
1765                         if (s->mv_type == MV_TYPE_8X8)
1766                             s->dquant = 0;
1767                     }
1768                 }
1769             }
1770         }
1771         ff_set_qscale(s, last_qp + s->dquant);
1772     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1773         ff_set_qscale(s, s->qscale + s->dquant);
1774
1775     wrap_y = s->linesize;
1776     wrap_c = s->uvlinesize;
1777     ptr_y  = s->new_picture.f.data[0] +
1778              (mb_y * 16 * wrap_y)              + mb_x * 16;
1779     ptr_cb = s->new_picture.f.data[1] +
1780              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1781     ptr_cr = s->new_picture.f.data[2] +
1782              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1783
1784     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1785         uint8_t *ebuf = s->edge_emu_buffer + 32;
1786         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1787                                  mb_y * 16, s->width, s->height);
1788         ptr_y = ebuf;
1789         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1790                                  mb_block_height, mb_x * 8, mb_y * 8,
1791                                  s->width >> 1, s->height >> 1);
1792         ptr_cb = ebuf + 18 * wrap_y;
1793         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1794                                  mb_block_height, mb_x * 8, mb_y * 8,
1795                                  s->width >> 1, s->height >> 1);
1796         ptr_cr = ebuf + 18 * wrap_y + 8;
1797     }
1798
1799     if (s->mb_intra) {
1800         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1801             int progressive_score, interlaced_score;
1802
1803             s->interlaced_dct = 0;
1804             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1805                                                     NULL, wrap_y, 8) +
1806                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1807                                                     NULL, wrap_y, 8) - 400;
1808
1809             if (progressive_score > 0) {
1810                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1811                                                        NULL, wrap_y * 2, 8) +
1812                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1813                                                        NULL, wrap_y * 2, 8);
1814                 if (progressive_score > interlaced_score) {
1815                     s->interlaced_dct = 1;
1816
1817                     dct_offset = wrap_y;
1818                     wrap_y <<= 1;
1819                     if (s->chroma_format == CHROMA_422)
1820                         wrap_c <<= 1;
1821                 }
1822             }
1823         }
1824
1825         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1826         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1827         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1828         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1829
1830         if (s->flags & CODEC_FLAG_GRAY) {
1831             skip_dct[4] = 1;
1832             skip_dct[5] = 1;
1833         } else {
1834             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1835             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1836             if (!s->chroma_y_shift) { /* 422 */
1837                 s->dsp.get_pixels(s->block[6],
1838                                   ptr_cb + (dct_offset >> 1), wrap_c);
1839                 s->dsp.get_pixels(s->block[7],
1840                                   ptr_cr + (dct_offset >> 1), wrap_c);
1841             }
1842         }
1843     } else {
1844         op_pixels_func (*op_pix)[4];
1845         qpel_mc_func (*op_qpix)[16];
1846         uint8_t *dest_y, *dest_cb, *dest_cr;
1847
1848         dest_y  = s->dest[0];
1849         dest_cb = s->dest[1];
1850         dest_cr = s->dest[2];
1851
1852         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1853             op_pix  = s->dsp.put_pixels_tab;
1854             op_qpix = s->dsp.put_qpel_pixels_tab;
1855         } else {
1856             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1857             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1858         }
1859
1860         if (s->mv_dir & MV_DIR_FORWARD) {
1861             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1862                           s->last_picture.f.data,
1863                           op_pix, op_qpix);
1864             op_pix  = s->dsp.avg_pixels_tab;
1865             op_qpix = s->dsp.avg_qpel_pixels_tab;
1866         }
1867         if (s->mv_dir & MV_DIR_BACKWARD) {
1868             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1869                           s->next_picture.f.data,
1870                           op_pix, op_qpix);
1871         }
1872
1873         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1874             int progressive_score, interlaced_score;
1875
1876             s->interlaced_dct = 0;
1877             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1878                                                     ptr_y,              wrap_y,
1879                                                     8) +
1880                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1881                                                     ptr_y + wrap_y * 8, wrap_y,
1882                                                     8) - 400;
1883
1884             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1885                 progressive_score -= 400;
1886
1887             if (progressive_score > 0) {
1888                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1889                                                        ptr_y,
1890                                                        wrap_y * 2, 8) +
1891                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1892                                                        ptr_y + wrap_y,
1893                                                        wrap_y * 2, 8);
1894
1895                 if (progressive_score > interlaced_score) {
1896                     s->interlaced_dct = 1;
1897
1898                     dct_offset = wrap_y;
1899                     wrap_y <<= 1;
1900                     if (s->chroma_format == CHROMA_422)
1901                         wrap_c <<= 1;
1902                 }
1903             }
1904         }
1905
1906         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1907         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1908         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1909                            dest_y + dct_offset, wrap_y);
1910         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1911                            dest_y + dct_offset + 8, wrap_y);
1912
1913         if (s->flags & CODEC_FLAG_GRAY) {
1914             skip_dct[4] = 1;
1915             skip_dct[5] = 1;
1916         } else {
1917             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1918             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1919             if (!s->chroma_y_shift) { /* 422 */
1920                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1921                                    dest_cb + (dct_offset >> 1), wrap_c);
1922                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1923                                    dest_cr + (dct_offset >> 1), wrap_c);
1924             }
1925         }
1926         /* pre quantization */
1927         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1928                 2 * s->qscale * s->qscale) {
1929             // FIXME optimize
1930             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1931                               wrap_y, 8) < 20 * s->qscale)
1932                 skip_dct[0] = 1;
1933             if (s->dsp.sad[1](NULL, ptr_y + 8,
1934                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1935                 skip_dct[1] = 1;
1936             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1937                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1938                 skip_dct[2] = 1;
1939             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1940                               dest_y + dct_offset + 8,
1941                               wrap_y, 8) < 20 * s->qscale)
1942                 skip_dct[3] = 1;
1943             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1944                               wrap_c, 8) < 20 * s->qscale)
1945                 skip_dct[4] = 1;
1946             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1947                               wrap_c, 8) < 20 * s->qscale)
1948                 skip_dct[5] = 1;
1949             if (!s->chroma_y_shift) { /* 422 */
1950                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1951                                   dest_cb + (dct_offset >> 1),
1952                                   wrap_c, 8) < 20 * s->qscale)
1953                     skip_dct[6] = 1;
1954                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1955                                   dest_cr + (dct_offset >> 1),
1956                                   wrap_c, 8) < 20 * s->qscale)
1957                     skip_dct[7] = 1;
1958             }
1959         }
1960     }
1961
1962     if (s->quantizer_noise_shaping) {
1963         if (!skip_dct[0])
1964             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1965         if (!skip_dct[1])
1966             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1967         if (!skip_dct[2])
1968             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1969         if (!skip_dct[3])
1970             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1971         if (!skip_dct[4])
1972             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1973         if (!skip_dct[5])
1974             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1975         if (!s->chroma_y_shift) { /* 422 */
1976             if (!skip_dct[6])
1977                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1978                                   wrap_c);
1979             if (!skip_dct[7])
1980                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1981                                   wrap_c);
1982         }
1983         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1984     }
1985
1986     /* DCT & quantize */
1987     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1988     {
1989         for (i = 0; i < mb_block_count; i++) {
1990             if (!skip_dct[i]) {
1991                 int overflow;
1992                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1993                 // FIXME we could decide to change to quantizer instead of
1994                 // clipping
1995                 // JS: I don't think that would be a good idea it could lower
1996                 //     quality instead of improve it. Just INTRADC clipping
1997                 //     deserves changes in quantizer
1998                 if (overflow)
1999                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2000             } else
2001                 s->block_last_index[i] = -1;
2002         }
2003         if (s->quantizer_noise_shaping) {
2004             for (i = 0; i < mb_block_count; i++) {
2005                 if (!skip_dct[i]) {
2006                     s->block_last_index[i] =
2007                         dct_quantize_refine(s, s->block[i], weight[i],
2008                                             orig[i], i, s->qscale);
2009                 }
2010             }
2011         }
2012
2013         if (s->luma_elim_threshold && !s->mb_intra)
2014             for (i = 0; i < 4; i++)
2015                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2016         if (s->chroma_elim_threshold && !s->mb_intra)
2017             for (i = 4; i < mb_block_count; i++)
2018                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2019
2020         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2021             for (i = 0; i < mb_block_count; i++) {
2022                 if (s->block_last_index[i] == -1)
2023                     s->coded_score[i] = INT_MAX / 256;
2024             }
2025         }
2026     }
2027
2028     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2029         s->block_last_index[4] =
2030         s->block_last_index[5] = 0;
2031         s->block[4][0] =
2032         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2033     }
2034
2035     // non c quantize code returns incorrect block_last_index FIXME
2036     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2037         for (i = 0; i < mb_block_count; i++) {
2038             int j;
2039             if (s->block_last_index[i] > 0) {
2040                 for (j = 63; j > 0; j--) {
2041                     if (s->block[i][s->intra_scantable.permutated[j]])
2042                         break;
2043                 }
2044                 s->block_last_index[i] = j;
2045             }
2046         }
2047     }
2048
2049     /* huffman encode */
2050     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2051     case AV_CODEC_ID_MPEG1VIDEO:
2052     case AV_CODEC_ID_MPEG2VIDEO:
2053         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2054             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2055         break;
2056     case AV_CODEC_ID_MPEG4:
2057         if (CONFIG_MPEG4_ENCODER)
2058             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2059         break;
2060     case AV_CODEC_ID_MSMPEG4V2:
2061     case AV_CODEC_ID_MSMPEG4V3:
2062     case AV_CODEC_ID_WMV1:
2063         if (CONFIG_MSMPEG4_ENCODER)
2064             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2065         break;
2066     case AV_CODEC_ID_WMV2:
2067         if (CONFIG_WMV2_ENCODER)
2068             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2069         break;
2070     case AV_CODEC_ID_H261:
2071         if (CONFIG_H261_ENCODER)
2072             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2073         break;
2074     case AV_CODEC_ID_H263:
2075     case AV_CODEC_ID_H263P:
2076     case AV_CODEC_ID_FLV1:
2077     case AV_CODEC_ID_RV10:
2078     case AV_CODEC_ID_RV20:
2079         if (CONFIG_H263_ENCODER)
2080             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2081         break;
2082     case AV_CODEC_ID_MJPEG:
2083         if (CONFIG_MJPEG_ENCODER)
2084             ff_mjpeg_encode_mb(s, s->block);
2085         break;
2086     default:
2087         assert(0);
2088     }
2089 }
2090
2091 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2092 {
2093     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2094     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2095 }
2096
2097 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2098     int i;
2099
2100     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2101
2102     /* mpeg1 */
2103     d->mb_skip_run= s->mb_skip_run;
2104     for(i=0; i<3; i++)
2105         d->last_dc[i] = s->last_dc[i];
2106
2107     /* statistics */
2108     d->mv_bits= s->mv_bits;
2109     d->i_tex_bits= s->i_tex_bits;
2110     d->p_tex_bits= s->p_tex_bits;
2111     d->i_count= s->i_count;
2112     d->f_count= s->f_count;
2113     d->b_count= s->b_count;
2114     d->skip_count= s->skip_count;
2115     d->misc_bits= s->misc_bits;
2116     d->last_bits= 0;
2117
2118     d->mb_skipped= 0;
2119     d->qscale= s->qscale;
2120     d->dquant= s->dquant;
2121
2122     d->esc3_level_length= s->esc3_level_length;
2123 }
2124
2125 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2126     int i;
2127
2128     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2129     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2130
2131     /* mpeg1 */
2132     d->mb_skip_run= s->mb_skip_run;
2133     for(i=0; i<3; i++)
2134         d->last_dc[i] = s->last_dc[i];
2135
2136     /* statistics */
2137     d->mv_bits= s->mv_bits;
2138     d->i_tex_bits= s->i_tex_bits;
2139     d->p_tex_bits= s->p_tex_bits;
2140     d->i_count= s->i_count;
2141     d->f_count= s->f_count;
2142     d->b_count= s->b_count;
2143     d->skip_count= s->skip_count;
2144     d->misc_bits= s->misc_bits;
2145
2146     d->mb_intra= s->mb_intra;
2147     d->mb_skipped= s->mb_skipped;
2148     d->mv_type= s->mv_type;
2149     d->mv_dir= s->mv_dir;
2150     d->pb= s->pb;
2151     if(s->data_partitioning){
2152         d->pb2= s->pb2;
2153         d->tex_pb= s->tex_pb;
2154     }
2155     d->block= s->block;
2156     for(i=0; i<8; i++)
2157         d->block_last_index[i]= s->block_last_index[i];
2158     d->interlaced_dct= s->interlaced_dct;
2159     d->qscale= s->qscale;
2160
2161     d->esc3_level_length= s->esc3_level_length;
2162 }
2163
2164 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2165                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2166                            int *dmin, int *next_block, int motion_x, int motion_y)
2167 {
2168     int score;
2169     uint8_t *dest_backup[3];
2170
2171     copy_context_before_encode(s, backup, type);
2172
2173     s->block= s->blocks[*next_block];
2174     s->pb= pb[*next_block];
2175     if(s->data_partitioning){
2176         s->pb2   = pb2   [*next_block];
2177         s->tex_pb= tex_pb[*next_block];
2178     }
2179
2180     if(*next_block){
2181         memcpy(dest_backup, s->dest, sizeof(s->dest));
2182         s->dest[0] = s->rd_scratchpad;
2183         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2184         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2185         assert(s->linesize >= 32); //FIXME
2186     }
2187
2188     encode_mb(s, motion_x, motion_y);
2189
2190     score= put_bits_count(&s->pb);
2191     if(s->data_partitioning){
2192         score+= put_bits_count(&s->pb2);
2193         score+= put_bits_count(&s->tex_pb);
2194     }
2195
2196     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2197         ff_MPV_decode_mb(s, s->block);
2198
2199         score *= s->lambda2;
2200         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2201     }
2202
2203     if(*next_block){
2204         memcpy(s->dest, dest_backup, sizeof(s->dest));
2205     }
2206
2207     if(score<*dmin){
2208         *dmin= score;
2209         *next_block^=1;
2210
2211         copy_context_after_encode(best, s, type);
2212     }
2213 }
2214
2215 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2216     uint32_t *sq = ff_squareTbl + 256;
2217     int acc=0;
2218     int x,y;
2219
2220     if(w==16 && h==16)
2221         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2222     else if(w==8 && h==8)
2223         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2224
2225     for(y=0; y<h; y++){
2226         for(x=0; x<w; x++){
2227             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2228         }
2229     }
2230
2231     assert(acc>=0);
2232
2233     return acc;
2234 }
2235
2236 static int sse_mb(MpegEncContext *s){
2237     int w= 16;
2238     int h= 16;
2239
2240     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2241     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2242
2243     if(w==16 && h==16)
2244       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2245         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2246                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2247                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2248       }else{
2249         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2250                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2251                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2252       }
2253     else
2254         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2255                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2256                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2257 }
2258
2259 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2260     MpegEncContext *s= *(void**)arg;
2261
2262
2263     s->me.pre_pass=1;
2264     s->me.dia_size= s->avctx->pre_dia_size;
2265     s->first_slice_line=1;
2266     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2267         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2268             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2269         }
2270         s->first_slice_line=0;
2271     }
2272
2273     s->me.pre_pass=0;
2274
2275     return 0;
2276 }
2277
2278 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2279     MpegEncContext *s= *(void**)arg;
2280
2281     ff_check_alignment();
2282
2283     s->me.dia_size= s->avctx->dia_size;
2284     s->first_slice_line=1;
2285     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2286         s->mb_x=0; //for block init below
2287         ff_init_block_index(s);
2288         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2289             s->block_index[0]+=2;
2290             s->block_index[1]+=2;
2291             s->block_index[2]+=2;
2292             s->block_index[3]+=2;
2293
2294             /* compute motion vector & mb_type and store in context */
2295             if(s->pict_type==AV_PICTURE_TYPE_B)
2296                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2297             else
2298                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2299         }
2300         s->first_slice_line=0;
2301     }
2302     return 0;
2303 }
2304
2305 static int mb_var_thread(AVCodecContext *c, void *arg){
2306     MpegEncContext *s= *(void**)arg;
2307     int mb_x, mb_y;
2308
2309     ff_check_alignment();
2310
2311     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2312         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2313             int xx = mb_x * 16;
2314             int yy = mb_y * 16;
2315             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2316             int varc;
2317             int sum = s->dsp.pix_sum(pix, s->linesize);
2318
2319             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2320
2321             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2322             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2323             s->me.mb_var_sum_temp    += varc;
2324         }
2325     }
2326     return 0;
2327 }
2328
2329 static void write_slice_end(MpegEncContext *s){
2330     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2331         if(s->partitioned_frame){
2332             ff_mpeg4_merge_partitions(s);
2333         }
2334
2335         ff_mpeg4_stuffing(&s->pb);
2336     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2337         ff_mjpeg_encode_stuffing(&s->pb);
2338     }
2339
2340     avpriv_align_put_bits(&s->pb);
2341     flush_put_bits(&s->pb);
2342
2343     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2344         s->misc_bits+= get_bits_diff(s);
2345 }
2346
2347 static void write_mb_info(MpegEncContext *s)
2348 {
2349     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2350     int offset = put_bits_count(&s->pb);
2351     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2352     int gobn = s->mb_y / s->gob_index;
2353     int pred_x, pred_y;
2354     if (CONFIG_H263_ENCODER)
2355         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2356     bytestream_put_le32(&ptr, offset);
2357     bytestream_put_byte(&ptr, s->qscale);
2358     bytestream_put_byte(&ptr, gobn);
2359     bytestream_put_le16(&ptr, mba);
2360     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2361     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2362     /* 4MV not implemented */
2363     bytestream_put_byte(&ptr, 0); /* hmv2 */
2364     bytestream_put_byte(&ptr, 0); /* vmv2 */
2365 }
2366
2367 static void update_mb_info(MpegEncContext *s, int startcode)
2368 {
2369     if (!s->mb_info)
2370         return;
2371     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2372         s->mb_info_size += 12;
2373         s->prev_mb_info = s->last_mb_info;
2374     }
2375     if (startcode) {
2376         s->prev_mb_info = put_bits_count(&s->pb)/8;
2377         /* This might have incremented mb_info_size above, and we return without
2378          * actually writing any info into that slot yet. But in that case,
2379          * this will be called again at the start of the after writing the
2380          * start code, actually writing the mb info. */
2381         return;
2382     }
2383
2384     s->last_mb_info = put_bits_count(&s->pb)/8;
2385     if (!s->mb_info_size)
2386         s->mb_info_size += 12;
2387     write_mb_info(s);
2388 }
2389
2390 static int encode_thread(AVCodecContext *c, void *arg){
2391     MpegEncContext *s= *(void**)arg;
2392     int mb_x, mb_y, pdif = 0;
2393     int chr_h= 16>>s->chroma_y_shift;
2394     int i, j;
2395     MpegEncContext best_s, backup_s;
2396     uint8_t bit_buf[2][MAX_MB_BYTES];
2397     uint8_t bit_buf2[2][MAX_MB_BYTES];
2398     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2399     PutBitContext pb[2], pb2[2], tex_pb[2];
2400
2401     ff_check_alignment();
2402
2403     for(i=0; i<2; i++){
2404         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2405         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2406         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2407     }
2408
2409     s->last_bits= put_bits_count(&s->pb);
2410     s->mv_bits=0;
2411     s->misc_bits=0;
2412     s->i_tex_bits=0;
2413     s->p_tex_bits=0;
2414     s->i_count=0;
2415     s->f_count=0;
2416     s->b_count=0;
2417     s->skip_count=0;
2418
2419     for(i=0; i<3; i++){
2420         /* init last dc values */
2421         /* note: quant matrix value (8) is implied here */
2422         s->last_dc[i] = 128 << s->intra_dc_precision;
2423
2424         s->current_picture.f.error[i] = 0;
2425     }
2426     s->mb_skip_run = 0;
2427     memset(s->last_mv, 0, sizeof(s->last_mv));
2428
2429     s->last_mv_dir = 0;
2430
2431     switch(s->codec_id){
2432     case AV_CODEC_ID_H263:
2433     case AV_CODEC_ID_H263P:
2434     case AV_CODEC_ID_FLV1:
2435         if (CONFIG_H263_ENCODER)
2436             s->gob_index = ff_h263_get_gob_height(s);
2437         break;
2438     case AV_CODEC_ID_MPEG4:
2439         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2440             ff_mpeg4_init_partitions(s);
2441         break;
2442     }
2443
2444     s->resync_mb_x=0;
2445     s->resync_mb_y=0;
2446     s->first_slice_line = 1;
2447     s->ptr_lastgob = s->pb.buf;
2448     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2449         s->mb_x=0;
2450         s->mb_y= mb_y;
2451
2452         ff_set_qscale(s, s->qscale);
2453         ff_init_block_index(s);
2454
2455         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2456             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2457             int mb_type= s->mb_type[xy];
2458 //            int d;
2459             int dmin= INT_MAX;
2460             int dir;
2461
2462             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2463                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2464                 return -1;
2465             }
2466             if(s->data_partitioning){
2467                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2468                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2469                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2470                     return -1;
2471                 }
2472             }
2473
2474             s->mb_x = mb_x;
2475             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2476             ff_update_block_index(s);
2477
2478             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2479                 ff_h261_reorder_mb_index(s);
2480                 xy= s->mb_y*s->mb_stride + s->mb_x;
2481                 mb_type= s->mb_type[xy];
2482             }
2483
2484             /* write gob / video packet header  */
2485             if(s->rtp_mode){
2486                 int current_packet_size, is_gob_start;
2487
2488                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2489
2490                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2491
2492                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2493
2494                 switch(s->codec_id){
2495                 case AV_CODEC_ID_H263:
2496                 case AV_CODEC_ID_H263P:
2497                     if(!s->h263_slice_structured)
2498                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2499                     break;
2500                 case AV_CODEC_ID_MPEG2VIDEO:
2501                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2502                 case AV_CODEC_ID_MPEG1VIDEO:
2503                     if(s->mb_skip_run) is_gob_start=0;
2504                     break;
2505                 }
2506
2507                 if(is_gob_start){
2508                     if(s->start_mb_y != mb_y || mb_x!=0){
2509                         write_slice_end(s);
2510
2511                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2512                             ff_mpeg4_init_partitions(s);
2513                         }
2514                     }
2515
2516                     assert((put_bits_count(&s->pb)&7) == 0);
2517                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2518
2519                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2520                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2521                         int d= 100 / s->avctx->error_rate;
2522                         if(r % d == 0){
2523                             current_packet_size=0;
2524                             s->pb.buf_ptr= s->ptr_lastgob;
2525                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2526                         }
2527                     }
2528
2529                     if (s->avctx->rtp_callback){
2530                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2531                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2532                     }
2533                     update_mb_info(s, 1);
2534
2535                     switch(s->codec_id){
2536                     case AV_CODEC_ID_MPEG4:
2537                         if (CONFIG_MPEG4_ENCODER) {
2538                             ff_mpeg4_encode_video_packet_header(s);
2539                             ff_mpeg4_clean_buffers(s);
2540                         }
2541                     break;
2542                     case AV_CODEC_ID_MPEG1VIDEO:
2543                     case AV_CODEC_ID_MPEG2VIDEO:
2544                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2545                             ff_mpeg1_encode_slice_header(s);
2546                             ff_mpeg1_clean_buffers(s);
2547                         }
2548                     break;
2549                     case AV_CODEC_ID_H263:
2550                     case AV_CODEC_ID_H263P:
2551                         if (CONFIG_H263_ENCODER)
2552                             ff_h263_encode_gob_header(s, mb_y);
2553                     break;
2554                     }
2555
2556                     if(s->flags&CODEC_FLAG_PASS1){
2557                         int bits= put_bits_count(&s->pb);
2558                         s->misc_bits+= bits - s->last_bits;
2559                         s->last_bits= bits;
2560                     }
2561
2562                     s->ptr_lastgob += current_packet_size;
2563                     s->first_slice_line=1;
2564                     s->resync_mb_x=mb_x;
2565                     s->resync_mb_y=mb_y;
2566                 }
2567             }
2568
2569             if(  (s->resync_mb_x   == s->mb_x)
2570                && s->resync_mb_y+1 == s->mb_y){
2571                 s->first_slice_line=0;
2572             }
2573
2574             s->mb_skipped=0;
2575             s->dquant=0; //only for QP_RD
2576
2577             update_mb_info(s, 0);
2578
2579             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2580                 int next_block=0;
2581                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2582
2583                 copy_context_before_encode(&backup_s, s, -1);
2584                 backup_s.pb= s->pb;
2585                 best_s.data_partitioning= s->data_partitioning;
2586                 best_s.partitioned_frame= s->partitioned_frame;
2587                 if(s->data_partitioning){
2588                     backup_s.pb2= s->pb2;
2589                     backup_s.tex_pb= s->tex_pb;
2590                 }
2591
2592                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2593                     s->mv_dir = MV_DIR_FORWARD;
2594                     s->mv_type = MV_TYPE_16X16;
2595                     s->mb_intra= 0;
2596                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2597                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2598                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2599                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2600                 }
2601                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2602                     s->mv_dir = MV_DIR_FORWARD;
2603                     s->mv_type = MV_TYPE_FIELD;
2604                     s->mb_intra= 0;
2605                     for(i=0; i<2; i++){
2606                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2607                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2608                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2609                     }
2610                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2611                                  &dmin, &next_block, 0, 0);
2612                 }
2613                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2614                     s->mv_dir = MV_DIR_FORWARD;
2615                     s->mv_type = MV_TYPE_16X16;
2616                     s->mb_intra= 0;
2617                     s->mv[0][0][0] = 0;
2618                     s->mv[0][0][1] = 0;
2619                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2620                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2621                 }
2622                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2623                     s->mv_dir = MV_DIR_FORWARD;
2624                     s->mv_type = MV_TYPE_8X8;
2625                     s->mb_intra= 0;
2626                     for(i=0; i<4; i++){
2627                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2628                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2629                     }
2630                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2631                                  &dmin, &next_block, 0, 0);
2632                 }
2633                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2634                     s->mv_dir = MV_DIR_FORWARD;
2635                     s->mv_type = MV_TYPE_16X16;
2636                     s->mb_intra= 0;
2637                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2638                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2639                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2640                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2641                 }
2642                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2643                     s->mv_dir = MV_DIR_BACKWARD;
2644                     s->mv_type = MV_TYPE_16X16;
2645                     s->mb_intra= 0;
2646                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2647                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2648                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2649                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2650                 }
2651                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2652                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2653                     s->mv_type = MV_TYPE_16X16;
2654                     s->mb_intra= 0;
2655                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2656                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2657                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2658                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2659                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2660                                  &dmin, &next_block, 0, 0);
2661                 }
2662                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2663                     s->mv_dir = MV_DIR_FORWARD;
2664                     s->mv_type = MV_TYPE_FIELD;
2665                     s->mb_intra= 0;
2666                     for(i=0; i<2; i++){
2667                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2668                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2669                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2670                     }
2671                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2672                                  &dmin, &next_block, 0, 0);
2673                 }
2674                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2675                     s->mv_dir = MV_DIR_BACKWARD;
2676                     s->mv_type = MV_TYPE_FIELD;
2677                     s->mb_intra= 0;
2678                     for(i=0; i<2; i++){
2679                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2680                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2681                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2682                     }
2683                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2684                                  &dmin, &next_block, 0, 0);
2685                 }
2686                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2687                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2688                     s->mv_type = MV_TYPE_FIELD;
2689                     s->mb_intra= 0;
2690                     for(dir=0; dir<2; dir++){
2691                         for(i=0; i<2; i++){
2692                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2693                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2694                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2695                         }
2696                     }
2697                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2698                                  &dmin, &next_block, 0, 0);
2699                 }
2700                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2701                     s->mv_dir = 0;
2702                     s->mv_type = MV_TYPE_16X16;
2703                     s->mb_intra= 1;
2704                     s->mv[0][0][0] = 0;
2705                     s->mv[0][0][1] = 0;
2706                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2707                                  &dmin, &next_block, 0, 0);
2708                     if(s->h263_pred || s->h263_aic){
2709                         if(best_s.mb_intra)
2710                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2711                         else
2712                             ff_clean_intra_table_entries(s); //old mode?
2713                     }
2714                 }
2715
2716                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2717                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2718                         const int last_qp= backup_s.qscale;
2719                         int qpi, qp, dc[6];
2720                         int16_t ac[6][16];
2721                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2722                         static const int dquant_tab[4]={-1,1,-2,2};
2723
2724                         assert(backup_s.dquant == 0);
2725
2726                         //FIXME intra
2727                         s->mv_dir= best_s.mv_dir;
2728                         s->mv_type = MV_TYPE_16X16;
2729                         s->mb_intra= best_s.mb_intra;
2730                         s->mv[0][0][0] = best_s.mv[0][0][0];
2731                         s->mv[0][0][1] = best_s.mv[0][0][1];
2732                         s->mv[1][0][0] = best_s.mv[1][0][0];
2733                         s->mv[1][0][1] = best_s.mv[1][0][1];
2734
2735                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2736                         for(; qpi<4; qpi++){
2737                             int dquant= dquant_tab[qpi];
2738                             qp= last_qp + dquant;
2739                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2740                                 continue;
2741                             backup_s.dquant= dquant;
2742                             if(s->mb_intra && s->dc_val[0]){
2743                                 for(i=0; i<6; i++){
2744                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2745                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2746                                 }
2747                             }
2748
2749                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2750                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2751                             if(best_s.qscale != qp){
2752                                 if(s->mb_intra && s->dc_val[0]){
2753                                     for(i=0; i<6; i++){
2754                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2755                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2756                                     }
2757                                 }
2758                             }
2759                         }
2760                     }
2761                 }
2762                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2763                     int mx= s->b_direct_mv_table[xy][0];
2764                     int my= s->b_direct_mv_table[xy][1];
2765
2766                     backup_s.dquant = 0;
2767                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2768                     s->mb_intra= 0;
2769                     ff_mpeg4_set_direct_mv(s, mx, my);
2770                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2771                                  &dmin, &next_block, mx, my);
2772                 }
2773                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2774                     backup_s.dquant = 0;
2775                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2776                     s->mb_intra= 0;
2777                     ff_mpeg4_set_direct_mv(s, 0, 0);
2778                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2779                                  &dmin, &next_block, 0, 0);
2780                 }
2781                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2782                     int coded=0;
2783                     for(i=0; i<6; i++)
2784                         coded |= s->block_last_index[i];
2785                     if(coded){
2786                         int mx,my;
2787                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2788                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2789                             mx=my=0; //FIXME find the one we actually used
2790                             ff_mpeg4_set_direct_mv(s, mx, my);
2791                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2792                             mx= s->mv[1][0][0];
2793                             my= s->mv[1][0][1];
2794                         }else{
2795                             mx= s->mv[0][0][0];
2796                             my= s->mv[0][0][1];
2797                         }
2798
2799                         s->mv_dir= best_s.mv_dir;
2800                         s->mv_type = best_s.mv_type;
2801                         s->mb_intra= 0;
2802 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2803                         s->mv[0][0][1] = best_s.mv[0][0][1];
2804                         s->mv[1][0][0] = best_s.mv[1][0][0];
2805                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2806                         backup_s.dquant= 0;
2807                         s->skipdct=1;
2808                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2809                                         &dmin, &next_block, mx, my);
2810                         s->skipdct=0;
2811                     }
2812                 }
2813
2814                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2815
2816                 copy_context_after_encode(s, &best_s, -1);
2817
2818                 pb_bits_count= put_bits_count(&s->pb);
2819                 flush_put_bits(&s->pb);
2820                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2821                 s->pb= backup_s.pb;
2822
2823                 if(s->data_partitioning){
2824                     pb2_bits_count= put_bits_count(&s->pb2);
2825                     flush_put_bits(&s->pb2);
2826                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2827                     s->pb2= backup_s.pb2;
2828
2829                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2830                     flush_put_bits(&s->tex_pb);
2831                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2832                     s->tex_pb= backup_s.tex_pb;
2833                 }
2834                 s->last_bits= put_bits_count(&s->pb);
2835
2836                 if (CONFIG_H263_ENCODER &&
2837                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2838                     ff_h263_update_motion_val(s);
2839
2840                 if(next_block==0){ //FIXME 16 vs linesize16
2841                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2842                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2843                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2844                 }
2845
2846                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2847                     ff_MPV_decode_mb(s, s->block);
2848             } else {
2849                 int motion_x = 0, motion_y = 0;
2850                 s->mv_type=MV_TYPE_16X16;
2851                 // only one MB-Type possible
2852
2853                 switch(mb_type){
2854                 case CANDIDATE_MB_TYPE_INTRA:
2855                     s->mv_dir = 0;
2856                     s->mb_intra= 1;
2857                     motion_x= s->mv[0][0][0] = 0;
2858                     motion_y= s->mv[0][0][1] = 0;
2859                     break;
2860                 case CANDIDATE_MB_TYPE_INTER:
2861                     s->mv_dir = MV_DIR_FORWARD;
2862                     s->mb_intra= 0;
2863                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2864                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2865                     break;
2866                 case CANDIDATE_MB_TYPE_INTER_I:
2867                     s->mv_dir = MV_DIR_FORWARD;
2868                     s->mv_type = MV_TYPE_FIELD;
2869                     s->mb_intra= 0;
2870                     for(i=0; i<2; i++){
2871                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2872                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2873                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2874                     }
2875                     break;
2876                 case CANDIDATE_MB_TYPE_INTER4V:
2877                     s->mv_dir = MV_DIR_FORWARD;
2878                     s->mv_type = MV_TYPE_8X8;
2879                     s->mb_intra= 0;
2880                     for(i=0; i<4; i++){
2881                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2882                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2883                     }
2884                     break;
2885                 case CANDIDATE_MB_TYPE_DIRECT:
2886                     if (CONFIG_MPEG4_ENCODER) {
2887                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2888                         s->mb_intra= 0;
2889                         motion_x=s->b_direct_mv_table[xy][0];
2890                         motion_y=s->b_direct_mv_table[xy][1];
2891                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2892                     }
2893                     break;
2894                 case CANDIDATE_MB_TYPE_DIRECT0:
2895                     if (CONFIG_MPEG4_ENCODER) {
2896                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2897                         s->mb_intra= 0;
2898                         ff_mpeg4_set_direct_mv(s, 0, 0);
2899                     }
2900                     break;
2901                 case CANDIDATE_MB_TYPE_BIDIR:
2902                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2903                     s->mb_intra= 0;
2904                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2905                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2906                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2907                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2908                     break;
2909                 case CANDIDATE_MB_TYPE_BACKWARD:
2910                     s->mv_dir = MV_DIR_BACKWARD;
2911                     s->mb_intra= 0;
2912                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2913                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2914                     break;
2915                 case CANDIDATE_MB_TYPE_FORWARD:
2916                     s->mv_dir = MV_DIR_FORWARD;
2917                     s->mb_intra= 0;
2918                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2919                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2920                     break;
2921                 case CANDIDATE_MB_TYPE_FORWARD_I:
2922                     s->mv_dir = MV_DIR_FORWARD;
2923                     s->mv_type = MV_TYPE_FIELD;
2924                     s->mb_intra= 0;
2925                     for(i=0; i<2; i++){
2926                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2927                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2928                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2929                     }
2930                     break;
2931                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2932                     s->mv_dir = MV_DIR_BACKWARD;
2933                     s->mv_type = MV_TYPE_FIELD;
2934                     s->mb_intra= 0;
2935                     for(i=0; i<2; i++){
2936                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2937                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2938                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2939                     }
2940                     break;
2941                 case CANDIDATE_MB_TYPE_BIDIR_I:
2942                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2943                     s->mv_type = MV_TYPE_FIELD;
2944                     s->mb_intra= 0;
2945                     for(dir=0; dir<2; dir++){
2946                         for(i=0; i<2; i++){
2947                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2948                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2949                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2950                         }
2951                     }
2952                     break;
2953                 default:
2954                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2955                 }
2956
2957                 encode_mb(s, motion_x, motion_y);
2958
2959                 // RAL: Update last macroblock type
2960                 s->last_mv_dir = s->mv_dir;
2961
2962                 if (CONFIG_H263_ENCODER &&
2963                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2964                     ff_h263_update_motion_val(s);
2965
2966                 ff_MPV_decode_mb(s, s->block);
2967             }
2968
2969             /* clean the MV table in IPS frames for direct mode in B frames */
2970             if(s->mb_intra /* && I,P,S_TYPE */){
2971                 s->p_mv_table[xy][0]=0;
2972                 s->p_mv_table[xy][1]=0;
2973             }
2974
2975             if(s->flags&CODEC_FLAG_PSNR){
2976                 int w= 16;
2977                 int h= 16;
2978
2979                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2980                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2981
2982                 s->current_picture.f.error[0] += sse(
2983                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2984                     s->dest[0], w, h, s->linesize);
2985                 s->current_picture.f.error[1] += sse(
2986                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2987                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2988                 s->current_picture.f.error[2] += sse(
2989                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2990                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2991             }
2992             if(s->loop_filter){
2993                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2994                     ff_h263_loop_filter(s);
2995             }
2996             av_dlog(s->avctx, "MB %d %d bits\n",
2997                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2998         }
2999     }
3000
3001     //not beautiful here but we must write it before flushing so it has to be here
3002     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3003         ff_msmpeg4_encode_ext_header(s);
3004
3005     write_slice_end(s);
3006
3007     /* Send the last GOB if RTP */
3008     if (s->avctx->rtp_callback) {
3009         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3010         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3011         /* Call the RTP callback to send the last GOB */
3012         emms_c();
3013         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3014     }
3015
3016     return 0;
3017 }
3018
3019 #define MERGE(field) dst->field += src->field; src->field=0
3020 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3021     MERGE(me.scene_change_score);
3022     MERGE(me.mc_mb_var_sum_temp);
3023     MERGE(me.mb_var_sum_temp);
3024 }
3025
3026 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3027     int i;
3028
3029     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3030     MERGE(dct_count[1]);
3031     MERGE(mv_bits);
3032     MERGE(i_tex_bits);
3033     MERGE(p_tex_bits);
3034     MERGE(i_count);
3035     MERGE(f_count);
3036     MERGE(b_count);
3037     MERGE(skip_count);
3038     MERGE(misc_bits);
3039     MERGE(error_count);
3040     MERGE(padding_bug_score);
3041     MERGE(current_picture.f.error[0]);
3042     MERGE(current_picture.f.error[1]);
3043     MERGE(current_picture.f.error[2]);
3044
3045     if(dst->avctx->noise_reduction){
3046         for(i=0; i<64; i++){
3047             MERGE(dct_error_sum[0][i]);
3048             MERGE(dct_error_sum[1][i]);
3049         }
3050     }
3051
3052     assert(put_bits_count(&src->pb) % 8 ==0);
3053     assert(put_bits_count(&dst->pb) % 8 ==0);
3054     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3055     flush_put_bits(&dst->pb);
3056 }
3057
3058 static int estimate_qp(MpegEncContext *s, int dry_run){
3059     if (s->next_lambda){
3060         s->current_picture_ptr->f.quality =
3061         s->current_picture.f.quality = s->next_lambda;
3062         if(!dry_run) s->next_lambda= 0;
3063     } else if (!s->fixed_qscale) {
3064         s->current_picture_ptr->f.quality =
3065         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3066         if (s->current_picture.f.quality < 0)
3067             return -1;
3068     }
3069
3070     if(s->adaptive_quant){
3071         switch(s->codec_id){
3072         case AV_CODEC_ID_MPEG4:
3073             if (CONFIG_MPEG4_ENCODER)
3074                 ff_clean_mpeg4_qscales(s);
3075             break;
3076         case AV_CODEC_ID_H263:
3077         case AV_CODEC_ID_H263P:
3078         case AV_CODEC_ID_FLV1:
3079             if (CONFIG_H263_ENCODER)
3080                 ff_clean_h263_qscales(s);
3081             break;
3082         default:
3083             ff_init_qscale_tab(s);
3084         }
3085
3086         s->lambda= s->lambda_table[0];
3087         //FIXME broken
3088     }else
3089         s->lambda = s->current_picture.f.quality;
3090     update_qscale(s);
3091     return 0;
3092 }
3093
3094 /* must be called before writing the header */
3095 static void set_frame_distances(MpegEncContext * s){
3096     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3097     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3098
3099     if(s->pict_type==AV_PICTURE_TYPE_B){
3100         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3101         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3102     }else{
3103         s->pp_time= s->time - s->last_non_b_time;
3104         s->last_non_b_time= s->time;
3105         assert(s->picture_number==0 || s->pp_time > 0);
3106     }
3107 }
3108
3109 static int encode_picture(MpegEncContext *s, int picture_number)
3110 {
3111     int i, ret;
3112     int bits;
3113     int context_count = s->slice_context_count;
3114
3115     s->picture_number = picture_number;
3116
3117     /* Reset the average MB variance */
3118     s->me.mb_var_sum_temp    =
3119     s->me.mc_mb_var_sum_temp = 0;
3120
3121     /* we need to initialize some time vars before we can encode b-frames */
3122     // RAL: Condition added for MPEG1VIDEO
3123     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3124         set_frame_distances(s);
3125     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3126         ff_set_mpeg4_time(s);
3127
3128     s->me.scene_change_score=0;
3129
3130 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3131
3132     if(s->pict_type==AV_PICTURE_TYPE_I){
3133         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3134         else                        s->no_rounding=0;
3135     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3136         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3137             s->no_rounding ^= 1;
3138     }
3139
3140     if(s->flags & CODEC_FLAG_PASS2){
3141         if (estimate_qp(s,1) < 0)
3142             return -1;
3143         ff_get_2pass_fcode(s);
3144     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3145         if(s->pict_type==AV_PICTURE_TYPE_B)
3146             s->lambda= s->last_lambda_for[s->pict_type];
3147         else
3148             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3149         update_qscale(s);
3150     }
3151
3152     s->mb_intra=0; //for the rate distortion & bit compare functions
3153     for(i=1; i<context_count; i++){
3154         ret = ff_update_duplicate_context(s->thread_context[i], s);
3155         if (ret < 0)
3156             return ret;
3157     }
3158
3159     if(ff_init_me(s)<0)
3160         return -1;
3161
3162     /* Estimate motion for every MB */
3163     if(s->pict_type != AV_PICTURE_TYPE_I){
3164         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3165         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3166         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3167             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3168                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3169             }
3170         }
3171
3172         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3173     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3174         /* I-Frame */
3175         for(i=0; i<s->mb_stride*s->mb_height; i++)
3176             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3177
3178         if(!s->fixed_qscale){
3179             /* finding spatial complexity for I-frame rate control */
3180             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3181         }
3182     }
3183     for(i=1; i<context_count; i++){
3184         merge_context_after_me(s, s->thread_context[i]);
3185     }
3186     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3187     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3188     emms_c();
3189
3190     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3191         s->pict_type= AV_PICTURE_TYPE_I;
3192         for(i=0; i<s->mb_stride*s->mb_height; i++)
3193             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3194         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3195                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3196     }
3197
3198     if(!s->umvplus){
3199         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3200             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3201
3202             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3203                 int a,b;
3204                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3205                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3206                 s->f_code= FFMAX3(s->f_code, a, b);
3207             }
3208
3209             ff_fix_long_p_mvs(s);
3210             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3211             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3212                 int j;
3213                 for(i=0; i<2; i++){
3214                     for(j=0; j<2; j++)
3215                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3216                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3217                 }
3218             }
3219         }
3220
3221         if(s->pict_type==AV_PICTURE_TYPE_B){
3222             int a, b;
3223
3224             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3225             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3226             s->f_code = FFMAX(a, b);
3227
3228             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3229             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3230             s->b_code = FFMAX(a, b);
3231
3232             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3233             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3234             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3235             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3236             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3237                 int dir, j;
3238                 for(dir=0; dir<2; dir++){
3239                     for(i=0; i<2; i++){
3240                         for(j=0; j<2; j++){
3241                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3242                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3243                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3244                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3245                         }
3246                     }
3247                 }
3248             }
3249         }
3250     }
3251
3252     if (estimate_qp(s, 0) < 0)
3253         return -1;
3254
3255     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3256         s->qscale= 3; //reduce clipping problems
3257
3258     if (s->out_format == FMT_MJPEG) {
3259         /* for mjpeg, we do include qscale in the matrix */
3260         for(i=1;i<64;i++){
3261             int j= s->dsp.idct_permutation[i];
3262
3263             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3264         }
3265         s->y_dc_scale_table=
3266         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3267         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3268         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3269                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3270         s->qscale= 8;
3271     }
3272
3273     //FIXME var duplication
3274     s->current_picture_ptr->f.key_frame =
3275     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3276     s->current_picture_ptr->f.pict_type =
3277     s->current_picture.f.pict_type = s->pict_type;
3278
3279     if (s->current_picture.f.key_frame)
3280         s->picture_in_gop_number=0;
3281
3282     s->last_bits= put_bits_count(&s->pb);
3283     switch(s->out_format) {
3284     case FMT_MJPEG:
3285         if (CONFIG_MJPEG_ENCODER)
3286             ff_mjpeg_encode_picture_header(s);
3287         break;
3288     case FMT_H261:
3289         if (CONFIG_H261_ENCODER)
3290             ff_h261_encode_picture_header(s, picture_number);
3291         break;
3292     case FMT_H263:
3293         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3294             ff_wmv2_encode_picture_header(s, picture_number);
3295         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3296             ff_msmpeg4_encode_picture_header(s, picture_number);
3297         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3298             ff_mpeg4_encode_picture_header(s, picture_number);
3299         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3300             ff_rv10_encode_picture_header(s, picture_number);
3301         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3302             ff_rv20_encode_picture_header(s, picture_number);
3303         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3304             ff_flv_encode_picture_header(s, picture_number);
3305         else if (CONFIG_H263_ENCODER)
3306             ff_h263_encode_picture_header(s, picture_number);
3307         break;
3308     case FMT_MPEG1:
3309         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3310             ff_mpeg1_encode_picture_header(s, picture_number);
3311         break;
3312     case FMT_H264:
3313         break;
3314     default:
3315         assert(0);
3316     }
3317     bits= put_bits_count(&s->pb);
3318     s->header_bits= bits - s->last_bits;
3319
3320     for(i=1; i<context_count; i++){
3321         update_duplicate_context_after_me(s->thread_context[i], s);
3322     }
3323     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3324     for(i=1; i<context_count; i++){
3325         merge_context_after_encode(s, s->thread_context[i]);
3326     }
3327     emms_c();
3328     return 0;
3329 }
3330
3331 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3332     const int intra= s->mb_intra;
3333     int i;
3334
3335     s->dct_count[intra]++;
3336
3337     for(i=0; i<64; i++){
3338         int level= block[i];
3339
3340         if(level){
3341             if(level>0){
3342                 s->dct_error_sum[intra][i] += level;
3343                 level -= s->dct_offset[intra][i];
3344                 if(level<0) level=0;
3345             }else{
3346                 s->dct_error_sum[intra][i] -= level;
3347                 level += s->dct_offset[intra][i];
3348                 if(level>0) level=0;
3349             }
3350             block[i]= level;
3351         }
3352     }
3353 }
3354
3355 static int dct_quantize_trellis_c(MpegEncContext *s,
3356                                   int16_t *block, int n,
3357                                   int qscale, int *overflow){
3358     const int *qmat;
3359     const uint8_t *scantable= s->intra_scantable.scantable;
3360     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3361     int max=0;
3362     unsigned int threshold1, threshold2;
3363     int bias=0;
3364     int run_tab[65];
3365     int level_tab[65];
3366     int score_tab[65];
3367     int survivor[65];
3368     int survivor_count;
3369     int last_run=0;
3370     int last_level=0;
3371     int last_score= 0;
3372     int last_i;
3373     int coeff[2][64];
3374     int coeff_count[64];
3375     int qmul, qadd, start_i, last_non_zero, i, dc;
3376     const int esc_length= s->ac_esc_length;
3377     uint8_t * length;
3378     uint8_t * last_length;
3379     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3380
3381     s->dsp.fdct (block);
3382
3383     if(s->dct_error_sum)
3384         s->denoise_dct(s, block);
3385     qmul= qscale*16;
3386     qadd= ((qscale-1)|1)*8;
3387
3388     if (s->mb_intra) {
3389         int q;
3390         if (!s->h263_aic) {
3391             if (n < 4)
3392                 q = s->y_dc_scale;
3393             else
3394                 q = s->c_dc_scale;
3395             q = q << 3;
3396         } else{
3397             /* For AIC we skip quant/dequant of INTRADC */
3398             q = 1 << 3;
3399             qadd=0;
3400         }
3401
3402         /* note: block[0] is assumed to be positive */
3403         block[0] = (block[0] + (q >> 1)) / q;
3404         start_i = 1;
3405         last_non_zero = 0;
3406         qmat = s->q_intra_matrix[qscale];
3407         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3408             bias= 1<<(QMAT_SHIFT-1);
3409         length     = s->intra_ac_vlc_length;
3410         last_length= s->intra_ac_vlc_last_length;
3411     } else {
3412         start_i = 0;
3413         last_non_zero = -1;
3414         qmat = s->q_inter_matrix[qscale];
3415         length     = s->inter_ac_vlc_length;
3416         last_length= s->inter_ac_vlc_last_length;
3417     }
3418     last_i= start_i;
3419
3420     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3421     threshold2= (threshold1<<1);
3422
3423     for(i=63; i>=start_i; i--) {
3424         const int j = scantable[i];
3425         int level = block[j] * qmat[j];
3426
3427         if(((unsigned)(level+threshold1))>threshold2){
3428             last_non_zero = i;
3429             break;
3430         }
3431     }
3432
3433     for(i=start_i; i<=last_non_zero; i++) {
3434         const int j = scantable[i];
3435         int level = block[j] * qmat[j];
3436
3437 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3438 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3439         if(((unsigned)(level+threshold1))>threshold2){
3440             if(level>0){
3441                 level= (bias + level)>>QMAT_SHIFT;
3442                 coeff[0][i]= level;
3443                 coeff[1][i]= level-1;
3444 //                coeff[2][k]= level-2;
3445             }else{
3446                 level= (bias - level)>>QMAT_SHIFT;
3447                 coeff[0][i]= -level;
3448                 coeff[1][i]= -level+1;
3449 //                coeff[2][k]= -level+2;
3450             }
3451             coeff_count[i]= FFMIN(level, 2);
3452             assert(coeff_count[i]);
3453             max |=level;
3454         }else{
3455             coeff[0][i]= (level>>31)|1;
3456             coeff_count[i]= 1;
3457         }
3458     }
3459
3460     *overflow= s->max_qcoeff < max; //overflow might have happened
3461
3462     if(last_non_zero < start_i){
3463         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3464         return last_non_zero;
3465     }
3466
3467     score_tab[start_i]= 0;
3468     survivor[0]= start_i;
3469     survivor_count= 1;
3470
3471     for(i=start_i; i<=last_non_zero; i++){
3472         int level_index, j, zero_distortion;
3473         int dct_coeff= FFABS(block[ scantable[i] ]);
3474         int best_score=256*256*256*120;
3475
3476         if (s->dsp.fdct == ff_fdct_ifast)
3477             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3478         zero_distortion= dct_coeff*dct_coeff;
3479
3480         for(level_index=0; level_index < coeff_count[i]; level_index++){
3481             int distortion;
3482             int level= coeff[level_index][i];
3483             const int alevel= FFABS(level);
3484             int unquant_coeff;
3485
3486             assert(level);
3487
3488             if(s->out_format == FMT_H263){
3489                 unquant_coeff= alevel*qmul + qadd;
3490             }else{ //MPEG1
3491                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3492                 if(s->mb_intra){
3493                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3494                         unquant_coeff =   (unquant_coeff - 1) | 1;
3495                 }else{
3496                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3497                         unquant_coeff =   (unquant_coeff - 1) | 1;
3498                 }
3499                 unquant_coeff<<= 3;
3500             }
3501
3502             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3503             level+=64;
3504             if((level&(~127)) == 0){
3505                 for(j=survivor_count-1; j>=0; j--){
3506                     int run= i - survivor[j];
3507                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3508                     score += score_tab[i-run];
3509
3510                     if(score < best_score){
3511                         best_score= score;
3512                         run_tab[i+1]= run;
3513                         level_tab[i+1]= level-64;
3514                     }
3515                 }
3516
3517                 if(s->out_format == FMT_H263){
3518                     for(j=survivor_count-1; j>=0; j--){
3519                         int run= i - survivor[j];
3520                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3521                         score += score_tab[i-run];
3522                         if(score < last_score){
3523                             last_score= score;
3524                             last_run= run;
3525                             last_level= level-64;
3526                             last_i= i+1;
3527                         }
3528                     }
3529                 }
3530             }else{
3531                 distortion += esc_length*lambda;
3532                 for(j=survivor_count-1; j>=0; j--){
3533                     int run= i - survivor[j];
3534                     int score= distortion + score_tab[i-run];
3535
3536                     if(score < best_score){
3537                         best_score= score;
3538                         run_tab[i+1]= run;
3539                         level_tab[i+1]= level-64;
3540                     }
3541                 }
3542
3543                 if(s->out_format == FMT_H263){
3544                   for(j=survivor_count-1; j>=0; j--){
3545                         int run= i - survivor[j];
3546                         int score= distortion + score_tab[i-run];
3547                         if(score < last_score){
3548                             last_score= score;
3549                             last_run= run;
3550                             last_level= level-64;
3551                             last_i= i+1;
3552                         }
3553                     }
3554                 }
3555             }
3556         }
3557
3558         score_tab[i+1]= best_score;
3559
3560         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3561         if(last_non_zero <= 27){
3562             for(; survivor_count; survivor_count--){
3563                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3564                     break;
3565             }
3566         }else{
3567             for(; survivor_count; survivor_count--){
3568                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3569                     break;
3570             }
3571         }
3572
3573         survivor[ survivor_count++ ]= i+1;
3574     }
3575
3576     if(s->out_format != FMT_H263){
3577         last_score= 256*256*256*120;
3578         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3579             int score= score_tab[i];
3580             if(i) score += lambda*2; //FIXME exacter?
3581
3582             if(score < last_score){
3583                 last_score= score;
3584                 last_i= i;
3585                 last_level= level_tab[i];
3586                 last_run= run_tab[i];
3587             }
3588         }
3589     }
3590
3591     s->coded_score[n] = last_score;
3592
3593     dc= FFABS(block[0]);
3594     last_non_zero= last_i - 1;
3595     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3596
3597     if(last_non_zero < start_i)
3598         return last_non_zero;
3599
3600     if(last_non_zero == 0 && start_i == 0){
3601         int best_level= 0;
3602         int best_score= dc * dc;
3603
3604         for(i=0; i<coeff_count[0]; i++){
3605             int level= coeff[i][0];
3606             int alevel= FFABS(level);
3607             int unquant_coeff, score, distortion;
3608
3609             if(s->out_format == FMT_H263){
3610                     unquant_coeff= (alevel*qmul + qadd)>>3;
3611             }else{ //MPEG1
3612                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3613                     unquant_coeff =   (unquant_coeff - 1) | 1;
3614             }
3615             unquant_coeff = (unquant_coeff + 4) >> 3;
3616             unquant_coeff<<= 3 + 3;
3617
3618             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3619             level+=64;
3620             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3621             else                    score= distortion + esc_length*lambda;
3622
3623             if(score < best_score){
3624                 best_score= score;
3625                 best_level= level - 64;
3626             }
3627         }
3628         block[0]= best_level;
3629         s->coded_score[n] = best_score - dc*dc;
3630         if(best_level == 0) return -1;
3631         else                return last_non_zero;
3632     }
3633
3634     i= last_i;
3635     assert(last_level);
3636
3637     block[ perm_scantable[last_non_zero] ]= last_level;
3638     i -= last_run + 1;
3639
3640     for(; i>start_i; i -= run_tab[i] + 1){
3641         block[ perm_scantable[i-1] ]= level_tab[i];
3642     }
3643
3644     return last_non_zero;
3645 }
3646
3647 //#define REFINE_STATS 1
3648 static int16_t basis[64][64];
3649
3650 static void build_basis(uint8_t *perm){
3651     int i, j, x, y;
3652     emms_c();
3653     for(i=0; i<8; i++){
3654         for(j=0; j<8; j++){
3655             for(y=0; y<8; y++){
3656                 for(x=0; x<8; x++){
3657                     double s= 0.25*(1<<BASIS_SHIFT);
3658                     int index= 8*i + j;
3659                     int perm_index= perm[index];
3660                     if(i==0) s*= sqrt(0.5);
3661                     if(j==0) s*= sqrt(0.5);
3662                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3663                 }
3664             }
3665         }
3666     }
3667 }
3668
3669 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3670                         int16_t *block, int16_t *weight, int16_t *orig,
3671                         int n, int qscale){
3672     int16_t rem[64];
3673     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3674     const uint8_t *scantable= s->intra_scantable.scantable;
3675     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3676 //    unsigned int threshold1, threshold2;
3677 //    int bias=0;
3678     int run_tab[65];
3679     int prev_run=0;
3680     int prev_level=0;
3681     int qmul, qadd, start_i, last_non_zero, i, dc;
3682     uint8_t * length;
3683     uint8_t * last_length;
3684     int lambda;
3685     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3686 #ifdef REFINE_STATS
3687 static int count=0;
3688 static int after_last=0;
3689 static int to_zero=0;
3690 static int from_zero=0;
3691 static int raise=0;
3692 static int lower=0;
3693 static int messed_sign=0;
3694 #endif
3695
3696     if(basis[0][0] == 0)
3697         build_basis(s->dsp.idct_permutation);
3698
3699     qmul= qscale*2;
3700     qadd= (qscale-1)|1;
3701     if (s->mb_intra) {
3702         if (!s->h263_aic) {
3703             if (n < 4)
3704                 q = s->y_dc_scale;
3705             else
3706                 q = s->c_dc_scale;
3707         } else{
3708             /* For AIC we skip quant/dequant of INTRADC */
3709             q = 1;
3710             qadd=0;
3711         }
3712         q <<= RECON_SHIFT-3;
3713         /* note: block[0] is assumed to be positive */
3714         dc= block[0]*q;
3715 //        block[0] = (block[0] + (q >> 1)) / q;
3716         start_i = 1;
3717 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3718 //            bias= 1<<(QMAT_SHIFT-1);
3719         length     = s->intra_ac_vlc_length;
3720         last_length= s->intra_ac_vlc_last_length;
3721     } else {
3722         dc= 0;
3723         start_i = 0;
3724         length     = s->inter_ac_vlc_length;
3725         last_length= s->inter_ac_vlc_last_length;
3726     }
3727     last_non_zero = s->block_last_index[n];
3728
3729 #ifdef REFINE_STATS
3730 {START_TIMER
3731 #endif
3732     dc += (1<<(RECON_SHIFT-1));
3733     for(i=0; i<64; i++){
3734         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3735     }
3736 #ifdef REFINE_STATS
3737 STOP_TIMER("memset rem[]")}
3738 #endif
3739     sum=0;
3740     for(i=0; i<64; i++){
3741         int one= 36;
3742         int qns=4;
3743         int w;
3744
3745         w= FFABS(weight[i]) + qns*one;
3746         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3747
3748         weight[i] = w;
3749 //        w=weight[i] = (63*qns + (w/2)) / w;
3750
3751         assert(w>0);
3752         assert(w<(1<<6));
3753         sum += w*w;
3754     }
3755     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3756 #ifdef REFINE_STATS
3757 {START_TIMER
3758 #endif
3759     run=0;
3760     rle_index=0;
3761     for(i=start_i; i<=last_non_zero; i++){
3762         int j= perm_scantable[i];
3763         const int level= block[j];
3764         int coeff;
3765
3766         if(level){
3767             if(level<0) coeff= qmul*level - qadd;
3768             else        coeff= qmul*level + qadd;
3769             run_tab[rle_index++]=run;
3770             run=0;
3771
3772             s->dsp.add_8x8basis(rem, basis[j], coeff);
3773         }else{
3774             run++;
3775         }
3776     }
3777 #ifdef REFINE_STATS
3778 if(last_non_zero>0){
3779 STOP_TIMER("init rem[]")
3780 }
3781 }
3782
3783 {START_TIMER
3784 #endif
3785     for(;;){
3786         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3787         int best_coeff=0;
3788         int best_change=0;
3789         int run2, best_unquant_change=0, analyze_gradient;
3790 #ifdef REFINE_STATS
3791 {START_TIMER
3792 #endif
3793         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3794
3795         if(analyze_gradient){
3796 #ifdef REFINE_STATS
3797 {START_TIMER
3798 #endif
3799             for(i=0; i<64; i++){
3800                 int w= weight[i];
3801
3802                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3803             }
3804 #ifdef REFINE_STATS
3805 STOP_TIMER("rem*w*w")}
3806 {START_TIMER
3807 #endif
3808             s->dsp.fdct(d1);
3809 #ifdef REFINE_STATS
3810 STOP_TIMER("dct")}
3811 #endif
3812         }
3813
3814         if(start_i){
3815             const int level= block[0];
3816             int change, old_coeff;
3817
3818             assert(s->mb_intra);
3819
3820             old_coeff= q*level;
3821
3822             for(change=-1; change<=1; change+=2){
3823                 int new_level= level + change;
3824                 int score, new_coeff;
3825
3826                 new_coeff= q*new_level;
3827                 if(new_coeff >= 2048 || new_coeff < 0)
3828                     continue;
3829
3830                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3831                 if(score<best_score){
3832                     best_score= score;
3833                     best_coeff= 0;
3834                     best_change= change;
3835                     best_unquant_change= new_coeff - old_coeff;
3836                 }
3837             }
3838         }
3839
3840         run=0;
3841         rle_index=0;
3842         run2= run_tab[rle_index++];
3843         prev_level=0;
3844         prev_run=0;
3845
3846         for(i=start_i; i<64; i++){
3847             int j= perm_scantable[i];
3848             const int level= block[j];
3849             int change, old_coeff;
3850
3851             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3852                 break;
3853
3854             if(level){
3855                 if(level<0) old_coeff= qmul*level - qadd;
3856                 else        old_coeff= qmul*level + qadd;
3857                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3858             }else{
3859                 old_coeff=0;
3860                 run2--;
3861                 assert(run2>=0 || i >= last_non_zero );
3862             }
3863
3864             for(change=-1; change<=1; change+=2){
3865                 int new_level= level + change;
3866                 int score, new_coeff, unquant_change;
3867
3868                 score=0;
3869                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3870                    continue;
3871
3872                 if(new_level){
3873                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3874                     else            new_coeff= qmul*new_level + qadd;
3875                     if(new_coeff >= 2048 || new_coeff <= -2048)
3876                         continue;
3877                     //FIXME check for overflow
3878
3879                     if(level){
3880                         if(level < 63 && level > -63){
3881                             if(i < last_non_zero)
3882                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3883                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3884                             else
3885                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3886                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3887                         }
3888                     }else{
3889                         assert(FFABS(new_level)==1);
3890
3891                         if(analyze_gradient){
3892                             int g= d1[ scantable[i] ];
3893                             if(g && (g^new_level) >= 0)
3894                                 continue;
3895                         }
3896
3897                         if(i < last_non_zero){
3898                             int next_i= i + run2 + 1;
3899                             int next_level= block[ perm_scantable[next_i] ] + 64;
3900
3901                             if(next_level&(~127))
3902                                 next_level= 0;
3903
3904                             if(next_i < last_non_zero)
3905                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3906                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3907                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3908                             else
3909                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3910                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3911                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3912                         }else{
3913                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3914                             if(prev_level){
3915                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3916                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3917                             }
3918                         }
3919                     }
3920                 }else{
3921                     new_coeff=0;
3922                     assert(FFABS(level)==1);
3923
3924                     if(i < last_non_zero){
3925                         int next_i= i + run2 + 1;
3926                         int next_level= block[ perm_scantable[next_i] ] + 64;
3927
3928                         if(next_level&(~127))
3929                             next_level= 0;
3930
3931                         if(next_i < last_non_zero)
3932                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3933                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3934                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3935                         else
3936                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3937                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3938                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3939                     }else{
3940                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3941                         if(prev_level){
3942                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3943                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3944                         }
3945                     }
3946                 }
3947
3948                 score *= lambda;
3949
3950                 unquant_change= new_coeff - old_coeff;
3951                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3952
3953                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3954                 if(score<best_score){
3955                     best_score= score;
3956                     best_coeff= i;
3957                     best_change= change;
3958                     best_unquant_change= unquant_change;
3959                 }
3960             }
3961             if(level){
3962                 prev_level= level + 64;
3963                 if(prev_level&(~127))
3964                     prev_level= 0;
3965                 prev_run= run;
3966                 run=0;
3967             }else{
3968                 run++;
3969             }
3970         }
3971 #ifdef REFINE_STATS
3972 STOP_TIMER("iterative step")}
3973 #endif
3974
3975         if(best_change){
3976             int j= perm_scantable[ best_coeff ];
3977
3978             block[j] += best_change;
3979
3980             if(best_coeff > last_non_zero){
3981                 last_non_zero= best_coeff;
3982                 assert(block[j]);
3983 #ifdef REFINE_STATS
3984 after_last++;
3985 #endif
3986             }else{
3987 #ifdef REFINE_STATS
3988 if(block[j]){
3989     if(block[j] - best_change){
3990         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3991             raise++;
3992         }else{
3993             lower++;
3994         }
3995     }else{
3996         from_zero++;
3997     }
3998 }else{
3999     to_zero++;
4000 }
4001 #endif
4002                 for(; last_non_zero>=start_i; last_non_zero--){
4003                     if(block[perm_scantable[last_non_zero]])
4004                         break;
4005                 }
4006             }
4007 #ifdef REFINE_STATS
4008 count++;
4009 if(256*256*256*64 % count == 0){
4010     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4011 }
4012 #endif
4013             run=0;
4014             rle_index=0;
4015             for(i=start_i; i<=last_non_zero; i++){
4016                 int j= perm_scantable[i];
4017                 const int level= block[j];
4018
4019                  if(level){
4020                      run_tab[rle_index++]=run;
4021                      run=0;
4022                  }else{
4023                      run++;
4024                  }
4025             }
4026
4027             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4028         }else{
4029             break;
4030         }
4031     }
4032 #ifdef REFINE_STATS
4033 if(last_non_zero>0){
4034 STOP_TIMER("iterative search")
4035 }
4036 }
4037 #endif
4038
4039     return last_non_zero;
4040 }
4041
4042 int ff_dct_quantize_c(MpegEncContext *s,
4043                         int16_t *block, int n,
4044                         int qscale, int *overflow)
4045 {
4046     int i, j, level, last_non_zero, q, start_i;
4047     const int *qmat;
4048     const uint8_t *scantable= s->intra_scantable.scantable;
4049     int bias;
4050     int max=0;
4051     unsigned int threshold1, threshold2;
4052
4053     s->dsp.fdct (block);
4054
4055     if(s->dct_error_sum)
4056         s->denoise_dct(s, block);
4057
4058     if (s->mb_intra) {
4059         if (!s->h263_aic) {
4060             if (n < 4)
4061                 q = s->y_dc_scale;
4062             else
4063                 q = s->c_dc_scale;
4064             q = q << 3;
4065         } else
4066             /* For AIC we skip quant/dequant of INTRADC */
4067             q = 1 << 3;
4068
4069         /* note: block[0] is assumed to be positive */
4070         block[0] = (block[0] + (q >> 1)) / q;
4071         start_i = 1;
4072         last_non_zero = 0;
4073         qmat = s->q_intra_matrix[qscale];
4074         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4075     } else {
4076         start_i = 0;
4077         last_non_zero = -1;
4078         qmat = s->q_inter_matrix[qscale];
4079         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4080     }
4081     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4082     threshold2= (threshold1<<1);
4083     for(i=63;i>=start_i;i--) {
4084         j = scantable[i];
4085         level = block[j] * qmat[j];
4086
4087         if(((unsigned)(level+threshold1))>threshold2){
4088             last_non_zero = i;
4089             break;
4090         }else{
4091             block[j]=0;
4092         }
4093     }
4094     for(i=start_i; i<=last_non_zero; i++) {
4095         j = scantable[i];
4096         level = block[j] * qmat[j];
4097
4098 //        if(   bias+level >= (1<<QMAT_SHIFT)
4099 //           || bias-level >= (1<<QMAT_SHIFT)){
4100         if(((unsigned)(level+threshold1))>threshold2){
4101             if(level>0){
4102                 level= (bias + level)>>QMAT_SHIFT;
4103                 block[j]= level;
4104             }else{
4105                 level= (bias - level)>>QMAT_SHIFT;
4106                 block[j]= -level;
4107             }
4108             max |=level;
4109         }else{
4110             block[j]=0;
4111         }
4112     }
4113     *overflow= s->max_qcoeff < max; //overflow might have happened
4114
4115     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4116     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4117         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4118
4119     return last_non_zero;
4120 }
4121
4122 #define OFFSET(x) offsetof(MpegEncContext, x)
4123 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4124 static const AVOption h263_options[] = {
4125     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4126     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4127     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4128     FF_MPV_COMMON_OPTS
4129     { NULL },
4130 };
4131
4132 static const AVClass h263_class = {
4133     .class_name = "H.263 encoder",
4134     .item_name  = av_default_item_name,
4135     .option     = h263_options,
4136     .version    = LIBAVUTIL_VERSION_INT,
4137 };
4138
4139 AVCodec ff_h263_encoder = {
4140     .name           = "h263",
4141     .type           = AVMEDIA_TYPE_VIDEO,
4142     .id             = AV_CODEC_ID_H263,
4143     .priv_data_size = sizeof(MpegEncContext),
4144     .init           = ff_MPV_encode_init,
4145     .encode2        = ff_MPV_encode_picture,
4146     .close          = ff_MPV_encode_end,
4147     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4148     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4149     .priv_class     = &h263_class,
4150 };
4151
4152 static const AVOption h263p_options[] = {
4153     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4154     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4155     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4156     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4157     FF_MPV_COMMON_OPTS
4158     { NULL },
4159 };
4160 static const AVClass h263p_class = {
4161     .class_name = "H.263p encoder",
4162     .item_name  = av_default_item_name,
4163     .option     = h263p_options,
4164     .version    = LIBAVUTIL_VERSION_INT,
4165 };
4166
4167 AVCodec ff_h263p_encoder = {
4168     .name           = "h263p",
4169     .type           = AVMEDIA_TYPE_VIDEO,
4170     .id             = AV_CODEC_ID_H263P,
4171     .priv_data_size = sizeof(MpegEncContext),
4172     .init           = ff_MPV_encode_init,
4173     .encode2        = ff_MPV_encode_picture,
4174     .close          = ff_MPV_encode_end,
4175     .capabilities   = CODEC_CAP_SLICE_THREADS,
4176     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4177     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4178     .priv_class     = &h263p_class,
4179 };
4180
4181 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4182
4183 AVCodec ff_msmpeg4v2_encoder = {
4184     .name           = "msmpeg4v2",
4185     .type           = AVMEDIA_TYPE_VIDEO,
4186     .id             = AV_CODEC_ID_MSMPEG4V2,
4187     .priv_data_size = sizeof(MpegEncContext),
4188     .init           = ff_MPV_encode_init,
4189     .encode2        = ff_MPV_encode_picture,
4190     .close          = ff_MPV_encode_end,
4191     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4192     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4193     .priv_class     = &msmpeg4v2_class,
4194 };
4195
4196 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4197
4198 AVCodec ff_msmpeg4v3_encoder = {
4199     .name           = "msmpeg4",
4200     .type           = AVMEDIA_TYPE_VIDEO,
4201     .id             = AV_CODEC_ID_MSMPEG4V3,
4202     .priv_data_size = sizeof(MpegEncContext),
4203     .init           = ff_MPV_encode_init,
4204     .encode2        = ff_MPV_encode_picture,
4205     .close          = ff_MPV_encode_end,
4206     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4207     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4208     .priv_class     = &msmpeg4v3_class,
4209 };
4210
4211 FF_MPV_GENERIC_CLASS(wmv1)
4212
4213 AVCodec ff_wmv1_encoder = {
4214     .name           = "wmv1",
4215     .type           = AVMEDIA_TYPE_VIDEO,
4216     .id             = AV_CODEC_ID_WMV1,
4217     .priv_data_size = sizeof(MpegEncContext),
4218     .init           = ff_MPV_encode_init,
4219     .encode2        = ff_MPV_encode_picture,
4220     .close          = ff_MPV_encode_end,
4221     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4222     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4223     .priv_class     = &wmv1_class,
4224 };