]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
dxva2: Keep code shared between dxva2 and d3d11va under the correct #if
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "mpegvideodata.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "h263data.h"
47 #include "mjpegenc_common.h"
48 #include "mathops.h"
49 #include "mpegutils.h"
50 #include "mjpegenc.h"
51 #include "msmpeg4.h"
52 #include "pixblockdsp.h"
53 #include "qpeldsp.h"
54 #include "faandct.h"
55 #include "thread.h"
56 #include "aandcttab.h"
57 #include "flv.h"
58 #include "mpeg4video.h"
59 #include "internal.h"
60 #include "bytestream.h"
61 #include "wmv2.h"
62 #include "rv10.h"
63 #include <limits.h>
64
65 #define QUANT_BIAS_SHIFT 8
66
67 #define QMAT_SHIFT_MMX 16
68 #define QMAT_SHIFT 22
69
70 static int encode_picture(MpegEncContext *s, int picture_number);
71 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
72 static int sse_mb(MpegEncContext *s);
73 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
74 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
75
76 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
77 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
78
79 const AVOption ff_mpv_generic_options[] = {
80     FF_MPV_COMMON_OPTS
81     { NULL },
82 };
83
84 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
85                        uint16_t (*qmat16)[2][64],
86                        const uint16_t *quant_matrix,
87                        int bias, int qmin, int qmax, int intra)
88 {
89     FDCTDSPContext *fdsp = &s->fdsp;
90     int qscale;
91     int shift = 0;
92
93     for (qscale = qmin; qscale <= qmax; qscale++) {
94         int i;
95         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
96 #if CONFIG_FAANDCT
97             fdsp->fdct == ff_faandct            ||
98 #endif /* CONFIG_FAANDCT */
99             fdsp->fdct == ff_jpeg_fdct_islow_10) {
100             for (i = 0; i < 64; i++) {
101                 const int j = s->idsp.idct_permutation[i];
102                 int64_t den = (int64_t) qscale * quant_matrix[j];
103                 /* 16 <= qscale * quant_matrix[i] <= 7905
104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
105                  *             19952 <=              x  <= 249205026
106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
107                  *           3444240 >= (1 << 36) / (x) >= 275 */
108
109                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
110             }
111         } else if (fdsp->fdct == ff_fdct_ifast) {
112             for (i = 0; i < 64; i++) {
113                 const int j = s->idsp.idct_permutation[i];
114                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
115                 /* 16 <= qscale * quant_matrix[i] <= 7905
116                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
117                  *             19952 <=              x  <= 249205026
118                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
119                  *           3444240 >= (1 << 36) / (x) >= 275 */
120
121                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
122             }
123         } else {
124             for (i = 0; i < 64; i++) {
125                 const int j = s->idsp.idct_permutation[i];
126                 int64_t den = (int64_t) qscale * quant_matrix[j];
127                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
128                  * Assume x = qscale * quant_matrix[i]
129                  * So             16 <=              x  <= 7905
130                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
131                  * so          32768 >= (1 << 19) / (x) >= 67 */
132                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
133                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
134                 //                    (qscale * quant_matrix[i]);
135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
136
137                 if (qmat16[qscale][0][i] == 0 ||
138                     qmat16[qscale][0][i] == 128 * 256)
139                     qmat16[qscale][0][i] = 128 * 256 - 1;
140                 qmat16[qscale][1][i] =
141                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
142                                 qmat16[qscale][0][i]);
143             }
144         }
145
146         for (i = intra; i < 64; i++) {
147             int64_t max = 8191;
148             if (fdsp->fdct == ff_fdct_ifast) {
149                 max = (8191LL * ff_aanscales[i]) >> 14;
150             }
151             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
152                 shift++;
153             }
154         }
155     }
156     if (shift) {
157         av_log(NULL, AV_LOG_INFO,
158                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
159                QMAT_SHIFT - shift);
160     }
161 }
162
163 static inline void update_qscale(MpegEncContext *s)
164 {
165     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
166                 (FF_LAMBDA_SHIFT + 7);
167     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
168
169     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
170                  FF_LAMBDA_SHIFT;
171 }
172
173 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
174 {
175     int i;
176
177     if (matrix) {
178         put_bits(pb, 1, 1);
179         for (i = 0; i < 64; i++) {
180             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
181         }
182     } else
183         put_bits(pb, 1, 0);
184 }
185
186 /**
187  * init s->current_picture.qscale_table from s->lambda_table
188  */
189 void ff_init_qscale_tab(MpegEncContext *s)
190 {
191     int8_t * const qscale_table = s->current_picture.qscale_table;
192     int i;
193
194     for (i = 0; i < s->mb_num; i++) {
195         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
196         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
197         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
198                                                   s->avctx->qmax);
199     }
200 }
201
202 static void update_duplicate_context_after_me(MpegEncContext *dst,
203                                               MpegEncContext *src)
204 {
205 #define COPY(a) dst->a= src->a
206     COPY(pict_type);
207     COPY(current_picture);
208     COPY(f_code);
209     COPY(b_code);
210     COPY(qscale);
211     COPY(lambda);
212     COPY(lambda2);
213     COPY(picture_in_gop_number);
214     COPY(gop_picture_number);
215     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
216     COPY(progressive_frame);    // FIXME don't set in encode_header
217     COPY(partitioned_frame);    // FIXME don't set in encode_header
218 #undef COPY
219 }
220
221 /**
222  * Set the given MpegEncContext to defaults for encoding.
223  * the changed fields will not depend upon the prior state of the MpegEncContext.
224  */
225 static void mpv_encode_defaults(MpegEncContext *s)
226 {
227     int i;
228     ff_mpv_common_defaults(s);
229
230     for (i = -16; i < 16; i++) {
231         default_fcode_tab[i + MAX_MV] = 1;
232     }
233     s->me.mv_penalty = default_mv_penalty;
234     s->fcode_tab     = default_fcode_tab;
235
236     s->input_picture_number  = 0;
237     s->picture_in_gop_number = 0;
238 }
239
240 /* init video encoder */
241 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
242 {
243     MpegEncContext *s = avctx->priv_data;
244     AVCPBProperties *cpb_props;
245     int i, ret, format_supported;
246
247     mpv_encode_defaults(s);
248
249     switch (avctx->codec_id) {
250     case AV_CODEC_ID_MPEG2VIDEO:
251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
253             av_log(avctx, AV_LOG_ERROR,
254                    "only YUV420 and YUV422 are supported\n");
255             return -1;
256         }
257         break;
258     case AV_CODEC_ID_MJPEG:
259         format_supported = 0;
260         /* JPEG color space */
261         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
262             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
263             (avctx->color_range == AVCOL_RANGE_JPEG &&
264              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
265               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
266             format_supported = 1;
267         /* MPEG color space */
268         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
269                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
270                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
271             format_supported = 1;
272
273         if (!format_supported) {
274             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
275             return -1;
276         }
277         break;
278     default:
279         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
280             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
281             return -1;
282         }
283     }
284
285     switch (avctx->pix_fmt) {
286     case AV_PIX_FMT_YUVJ422P:
287     case AV_PIX_FMT_YUV422P:
288         s->chroma_format = CHROMA_422;
289         break;
290     case AV_PIX_FMT_YUVJ420P:
291     case AV_PIX_FMT_YUV420P:
292     default:
293         s->chroma_format = CHROMA_420;
294         break;
295     }
296
297 #if FF_API_PRIVATE_OPT
298 FF_DISABLE_DEPRECATION_WARNINGS
299     if (avctx->rtp_payload_size)
300         s->rtp_payload_size = avctx->rtp_payload_size;
301     if (avctx->me_penalty_compensation)
302         s->me_penalty_compensation = avctx->me_penalty_compensation;
303     if (avctx->pre_me)
304         s->me_pre = avctx->pre_me;
305 FF_ENABLE_DEPRECATION_WARNINGS
306 #endif
307
308     s->bit_rate = avctx->bit_rate;
309     s->width    = avctx->width;
310     s->height   = avctx->height;
311     if (avctx->gop_size > 600 &&
312         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
313         av_log(avctx, AV_LOG_ERROR,
314                "Warning keyframe interval too large! reducing it ...\n");
315         avctx->gop_size = 600;
316     }
317     s->gop_size     = avctx->gop_size;
318     s->avctx        = avctx;
319     if (avctx->max_b_frames > MAX_B_FRAMES) {
320         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
321                "is %d.\n", MAX_B_FRAMES);
322     }
323     s->max_b_frames = avctx->max_b_frames;
324     s->codec_id     = avctx->codec->id;
325     s->strict_std_compliance = avctx->strict_std_compliance;
326     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
327     s->rtp_mode           = !!s->rtp_payload_size;
328     s->intra_dc_precision = avctx->intra_dc_precision;
329     s->user_specified_pts = AV_NOPTS_VALUE;
330
331     if (s->gop_size <= 1) {
332         s->intra_only = 1;
333         s->gop_size   = 12;
334     } else {
335         s->intra_only = 0;
336     }
337
338 #if FF_API_MOTION_EST
339 FF_DISABLE_DEPRECATION_WARNINGS
340     s->me_method = avctx->me_method;
341 FF_ENABLE_DEPRECATION_WARNINGS
342 #endif
343
344     /* Fixed QSCALE */
345     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
346
347 #if FF_API_MPV_OPT
348     FF_DISABLE_DEPRECATION_WARNINGS
349     if (avctx->border_masking != 0.0)
350         s->border_masking = avctx->border_masking;
351     FF_ENABLE_DEPRECATION_WARNINGS
352 #endif
353
354     s->adaptive_quant = (s->avctx->lumi_masking ||
355                          s->avctx->dark_masking ||
356                          s->avctx->temporal_cplx_masking ||
357                          s->avctx->spatial_cplx_masking  ||
358                          s->avctx->p_masking      ||
359                          s->border_masking ||
360                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
361                         !s->fixed_qscale;
362
363     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
364
365     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
366         av_log(avctx, AV_LOG_ERROR,
367                "a vbv buffer size is needed, "
368                "for encoding with a maximum bitrate\n");
369         return -1;
370     }
371
372     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
373         av_log(avctx, AV_LOG_INFO,
374                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
375     }
376
377     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
378         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
379         return -1;
380     }
381
382     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
383         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
384         return -1;
385     }
386
387     if (avctx->rc_max_rate &&
388         avctx->rc_max_rate == avctx->bit_rate &&
389         avctx->rc_max_rate != avctx->rc_min_rate) {
390         av_log(avctx, AV_LOG_INFO,
391                "impossible bitrate constraints, this will fail\n");
392     }
393
394     if (avctx->rc_buffer_size &&
395         avctx->bit_rate * (int64_t)avctx->time_base.num >
396             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
397         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
398         return -1;
399     }
400
401     if (!s->fixed_qscale &&
402         avctx->bit_rate * av_q2d(avctx->time_base) >
403             avctx->bit_rate_tolerance) {
404         av_log(avctx, AV_LOG_ERROR,
405                "bitrate tolerance too small for bitrate\n");
406         return -1;
407     }
408
409     if (s->avctx->rc_max_rate &&
410         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
411         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
412          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
413         90000LL * (avctx->rc_buffer_size - 1) >
414             s->avctx->rc_max_rate * 0xFFFFLL) {
415         av_log(avctx, AV_LOG_INFO,
416                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
417                "specified vbv buffer is too large for the given bitrate!\n");
418     }
419
420     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
421         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
422         s->codec_id != AV_CODEC_ID_FLV1) {
423         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
424         return -1;
425     }
426
427     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
428         av_log(avctx, AV_LOG_ERROR,
429                "OBMC is only supported with simple mb decision\n");
430         return -1;
431     }
432
433     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
434         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
435         return -1;
436     }
437
438     if (s->max_b_frames                    &&
439         s->codec_id != AV_CODEC_ID_MPEG4      &&
440         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
441         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
442         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
443         return -1;
444     }
445
446     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
447          s->codec_id == AV_CODEC_ID_H263  ||
448          s->codec_id == AV_CODEC_ID_H263P) &&
449         (avctx->sample_aspect_ratio.num > 255 ||
450          avctx->sample_aspect_ratio.den > 255)) {
451         av_log(avctx, AV_LOG_ERROR,
452                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
453                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
454         return -1;
455     }
456
457     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
458         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
459         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
460         return -1;
461     }
462
463 #if FF_API_PRIVATE_OPT
464     FF_DISABLE_DEPRECATION_WARNINGS
465     if (avctx->mpeg_quant)
466         s->mpeg_quant = avctx->mpeg_quant;
467     FF_ENABLE_DEPRECATION_WARNINGS
468 #endif
469
470     // FIXME mpeg2 uses that too
471     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
472         av_log(avctx, AV_LOG_ERROR,
473                "mpeg2 style quantization not supported by codec\n");
474         return -1;
475     }
476
477     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
478         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
479         return -1;
480     }
481
482     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
483         s->avctx->mb_decision != FF_MB_DECISION_RD) {
484         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
485         return -1;
486     }
487
488 #if FF_API_PRIVATE_OPT
489 FF_DISABLE_DEPRECATION_WARNINGS
490     if (avctx->scenechange_threshold)
491         s->scenechange_threshold = avctx->scenechange_threshold;
492 FF_ENABLE_DEPRECATION_WARNINGS
493 #endif
494
495     if (s->scenechange_threshold < 1000000000 &&
496         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
497         av_log(avctx, AV_LOG_ERROR,
498                "closed gop with scene change detection are not supported yet, "
499                "set threshold to 1000000000\n");
500         return -1;
501     }
502
503     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
504         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
505             av_log(avctx, AV_LOG_ERROR,
506                   "low delay forcing is only available for mpeg2\n");
507             return -1;
508         }
509         if (s->max_b_frames != 0) {
510             av_log(avctx, AV_LOG_ERROR,
511                    "B-frames cannot be used with low delay\n");
512             return -1;
513         }
514     }
515
516     if (s->q_scale_type == 1) {
517         if (avctx->qmax > 12) {
518             av_log(avctx, AV_LOG_ERROR,
519                    "non linear quant only supports qmax <= 12 currently\n");
520             return -1;
521         }
522     }
523
524     if (avctx->slices > 1 &&
525         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
526         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
527         return AVERROR(EINVAL);
528     }
529
530     if (s->avctx->thread_count > 1         &&
531         s->codec_id != AV_CODEC_ID_MPEG4      &&
532         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
533         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
534         (s->codec_id != AV_CODEC_ID_H263P)) {
535         av_log(avctx, AV_LOG_ERROR,
536                "multi threaded encoding not supported by codec\n");
537         return -1;
538     }
539
540     if (s->avctx->thread_count < 1) {
541         av_log(avctx, AV_LOG_ERROR,
542                "automatic thread number detection not supported by codec,"
543                "patch welcome\n");
544         return -1;
545     }
546
547     if (!avctx->time_base.den || !avctx->time_base.num) {
548         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
549         return -1;
550     }
551
552 #if FF_API_PRIVATE_OPT
553 FF_DISABLE_DEPRECATION_WARNINGS
554     if (avctx->b_frame_strategy)
555         s->b_frame_strategy = avctx->b_frame_strategy;
556     if (avctx->b_sensitivity != 40)
557         s->b_sensitivity = avctx->b_sensitivity;
558 FF_ENABLE_DEPRECATION_WARNINGS
559 #endif
560
561     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
562         av_log(avctx, AV_LOG_INFO,
563                "notice: b_frame_strategy only affects the first pass\n");
564         s->b_frame_strategy = 0;
565     }
566
567     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
568     if (i > 1) {
569         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
570         avctx->time_base.den /= i;
571         avctx->time_base.num /= i;
572         //return -1;
573     }
574
575     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
576         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
577         // (a + x * 3 / 8) / x
578         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
579         s->inter_quant_bias = 0;
580     } else {
581         s->intra_quant_bias = 0;
582         // (a - x / 4) / x
583         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
584     }
585
586 #if FF_API_QUANT_BIAS
587 FF_DISABLE_DEPRECATION_WARNINGS
588     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
589         s->intra_quant_bias = avctx->intra_quant_bias;
590     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
591         s->inter_quant_bias = avctx->inter_quant_bias;
592 FF_ENABLE_DEPRECATION_WARNINGS
593 #endif
594
595     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
596         s->avctx->time_base.den > (1 << 16) - 1) {
597         av_log(avctx, AV_LOG_ERROR,
598                "timebase %d/%d not supported by MPEG 4 standard, "
599                "the maximum admitted value for the timebase denominator "
600                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
601                (1 << 16) - 1);
602         return -1;
603     }
604     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
605
606     switch (avctx->codec->id) {
607     case AV_CODEC_ID_MPEG1VIDEO:
608         s->out_format = FMT_MPEG1;
609         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
610         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
611         break;
612     case AV_CODEC_ID_MPEG2VIDEO:
613         s->out_format = FMT_MPEG1;
614         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
615         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
616         s->rtp_mode   = 1;
617         break;
618     case AV_CODEC_ID_MJPEG:
619         s->out_format = FMT_MJPEG;
620         s->intra_only = 1; /* force intra only for jpeg */
621         if (!CONFIG_MJPEG_ENCODER ||
622             ff_mjpeg_encode_init(s) < 0)
623             return -1;
624         avctx->delay = 0;
625         s->low_delay = 1;
626         break;
627     case AV_CODEC_ID_H261:
628         if (!CONFIG_H261_ENCODER)
629             return -1;
630         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
631             av_log(avctx, AV_LOG_ERROR,
632                    "The specified picture size of %dx%d is not valid for the "
633                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
634                     s->width, s->height);
635             return -1;
636         }
637         s->out_format = FMT_H261;
638         avctx->delay  = 0;
639         s->low_delay  = 1;
640         s->rtp_mode   = 0; /* Sliced encoding not supported */
641         break;
642     case AV_CODEC_ID_H263:
643         if (!CONFIG_H263_ENCODER)
644         return -1;
645         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
646                              s->width, s->height) == 8) {
647             av_log(avctx, AV_LOG_INFO,
648                    "The specified picture size of %dx%d is not valid for "
649                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
650                    "352x288, 704x576, and 1408x1152."
651                    "Try H.263+.\n", s->width, s->height);
652             return -1;
653         }
654         s->out_format = FMT_H263;
655         avctx->delay  = 0;
656         s->low_delay  = 1;
657         break;
658     case AV_CODEC_ID_H263P:
659         s->out_format = FMT_H263;
660         s->h263_plus  = 1;
661         /* Fx */
662         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
663         s->modified_quant  = s->h263_aic;
664         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
665         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
666
667         /* /Fx */
668         /* These are just to be sure */
669         avctx->delay = 0;
670         s->low_delay = 1;
671         break;
672     case AV_CODEC_ID_FLV1:
673         s->out_format      = FMT_H263;
674         s->h263_flv        = 2; /* format = 1; 11-bit codes */
675         s->unrestricted_mv = 1;
676         s->rtp_mode  = 0; /* don't allow GOB */
677         avctx->delay = 0;
678         s->low_delay = 1;
679         break;
680     case AV_CODEC_ID_RV10:
681         s->out_format = FMT_H263;
682         avctx->delay  = 0;
683         s->low_delay  = 1;
684         break;
685     case AV_CODEC_ID_RV20:
686         s->out_format      = FMT_H263;
687         avctx->delay       = 0;
688         s->low_delay       = 1;
689         s->modified_quant  = 1;
690         s->h263_aic        = 1;
691         s->h263_plus       = 1;
692         s->loop_filter     = 1;
693         s->unrestricted_mv = 0;
694         break;
695     case AV_CODEC_ID_MPEG4:
696         s->out_format      = FMT_H263;
697         s->h263_pred       = 1;
698         s->unrestricted_mv = 1;
699         s->low_delay       = s->max_b_frames ? 0 : 1;
700         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
701         break;
702     case AV_CODEC_ID_MSMPEG4V2:
703         s->out_format      = FMT_H263;
704         s->h263_pred       = 1;
705         s->unrestricted_mv = 1;
706         s->msmpeg4_version = 2;
707         avctx->delay       = 0;
708         s->low_delay       = 1;
709         break;
710     case AV_CODEC_ID_MSMPEG4V3:
711         s->out_format        = FMT_H263;
712         s->h263_pred         = 1;
713         s->unrestricted_mv   = 1;
714         s->msmpeg4_version   = 3;
715         s->flipflop_rounding = 1;
716         avctx->delay         = 0;
717         s->low_delay         = 1;
718         break;
719     case AV_CODEC_ID_WMV1:
720         s->out_format        = FMT_H263;
721         s->h263_pred         = 1;
722         s->unrestricted_mv   = 1;
723         s->msmpeg4_version   = 4;
724         s->flipflop_rounding = 1;
725         avctx->delay         = 0;
726         s->low_delay         = 1;
727         break;
728     case AV_CODEC_ID_WMV2:
729         s->out_format        = FMT_H263;
730         s->h263_pred         = 1;
731         s->unrestricted_mv   = 1;
732         s->msmpeg4_version   = 5;
733         s->flipflop_rounding = 1;
734         avctx->delay         = 0;
735         s->low_delay         = 1;
736         break;
737     default:
738         return -1;
739     }
740
741 #if FF_API_PRIVATE_OPT
742     FF_DISABLE_DEPRECATION_WARNINGS
743     if (avctx->noise_reduction)
744         s->noise_reduction = avctx->noise_reduction;
745     FF_ENABLE_DEPRECATION_WARNINGS
746 #endif
747
748     avctx->has_b_frames = !s->low_delay;
749
750     s->encoding = 1;
751
752     s->progressive_frame    =
753     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
754                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
755                                 s->alternate_scan);
756
757     /* init */
758     ff_mpv_idct_init(s);
759     if (ff_mpv_common_init(s) < 0)
760         return -1;
761
762     if (ARCH_X86)
763         ff_mpv_encode_init_x86(s);
764
765     ff_fdctdsp_init(&s->fdsp, avctx);
766     ff_me_cmp_init(&s->mecc, avctx);
767     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
768     ff_pixblockdsp_init(&s->pdsp, avctx);
769     ff_qpeldsp_init(&s->qdsp);
770
771     if (s->msmpeg4_version) {
772         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
773                           2 * 2 * (MAX_LEVEL + 1) *
774                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
775     }
776     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
777
778     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
779     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
780     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
781     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
782     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
783                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
784     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
785                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
786
787
788     if (s->noise_reduction) {
789         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
790                           2 * 64 * sizeof(uint16_t), fail);
791     }
792
793     if (CONFIG_H263_ENCODER)
794         ff_h263dsp_init(&s->h263dsp);
795     if (!s->dct_quantize)
796         s->dct_quantize = ff_dct_quantize_c;
797     if (!s->denoise_dct)
798         s->denoise_dct  = denoise_dct_c;
799     s->fast_dct_quantize = s->dct_quantize;
800     if (avctx->trellis)
801         s->dct_quantize  = dct_quantize_trellis_c;
802
803     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
804         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
805
806     if (s->slice_context_count > 1) {
807         s->rtp_mode = 1;
808
809         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
810             s->h263_slice_structured = 1;
811     }
812
813     s->quant_precision = 5;
814
815 #if FF_API_PRIVATE_OPT
816 FF_DISABLE_DEPRECATION_WARNINGS
817     if (avctx->frame_skip_threshold)
818         s->frame_skip_threshold = avctx->frame_skip_threshold;
819     if (avctx->frame_skip_factor)
820         s->frame_skip_factor = avctx->frame_skip_factor;
821     if (avctx->frame_skip_exp)
822         s->frame_skip_exp = avctx->frame_skip_exp;
823     if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
824         s->frame_skip_cmp = avctx->frame_skip_cmp;
825 FF_ENABLE_DEPRECATION_WARNINGS
826 #endif
827
828     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
829     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
830
831     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
832         ff_h261_encode_init(s);
833     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
834         ff_h263_encode_init(s);
835     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
836         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
837             return ret;
838     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
839         && s->out_format == FMT_MPEG1)
840         ff_mpeg1_encode_init(s);
841
842     /* init q matrix */
843     for (i = 0; i < 64; i++) {
844         int j = s->idsp.idct_permutation[i];
845         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
846             s->mpeg_quant) {
847             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
848             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
849         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
850             s->intra_matrix[j] =
851             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
852         } else {
853             /* MPEG-1/2 */
854             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
855             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
856         }
857         if (s->avctx->intra_matrix)
858             s->intra_matrix[j] = s->avctx->intra_matrix[i];
859         if (s->avctx->inter_matrix)
860             s->inter_matrix[j] = s->avctx->inter_matrix[i];
861     }
862
863     /* precompute matrix */
864     /* for mjpeg, we do include qscale in the matrix */
865     if (s->out_format != FMT_MJPEG) {
866         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
867                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
868                           31, 1);
869         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
870                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
871                           31, 0);
872     }
873
874     if (ff_rate_control_init(s) < 0)
875         return -1;
876
877 #if FF_API_ERROR_RATE
878     FF_DISABLE_DEPRECATION_WARNINGS
879     if (avctx->error_rate)
880         s->error_rate = avctx->error_rate;
881     FF_ENABLE_DEPRECATION_WARNINGS;
882 #endif
883
884 #if FF_API_NORMALIZE_AQP
885     FF_DISABLE_DEPRECATION_WARNINGS
886     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
887         s->mpv_flags |= FF_MPV_FLAG_NAQ;
888     FF_ENABLE_DEPRECATION_WARNINGS;
889 #endif
890
891 #if FF_API_MV0
892     FF_DISABLE_DEPRECATION_WARNINGS
893     if (avctx->flags & CODEC_FLAG_MV0)
894         s->mpv_flags |= FF_MPV_FLAG_MV0;
895     FF_ENABLE_DEPRECATION_WARNINGS
896 #endif
897
898 #if FF_API_MPV_OPT
899     FF_DISABLE_DEPRECATION_WARNINGS
900     if (avctx->rc_qsquish != 0.0)
901         s->rc_qsquish = avctx->rc_qsquish;
902     if (avctx->rc_qmod_amp != 0.0)
903         s->rc_qmod_amp = avctx->rc_qmod_amp;
904     if (avctx->rc_qmod_freq)
905         s->rc_qmod_freq = avctx->rc_qmod_freq;
906     if (avctx->rc_buffer_aggressivity != 1.0)
907         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
908     if (avctx->rc_initial_cplx != 0.0)
909         s->rc_initial_cplx = avctx->rc_initial_cplx;
910     if (avctx->lmin)
911         s->lmin = avctx->lmin;
912     if (avctx->lmax)
913         s->lmax = avctx->lmax;
914
915     if (avctx->rc_eq) {
916         av_freep(&s->rc_eq);
917         s->rc_eq = av_strdup(avctx->rc_eq);
918         if (!s->rc_eq)
919             return AVERROR(ENOMEM);
920     }
921     FF_ENABLE_DEPRECATION_WARNINGS
922 #endif
923
924 #if FF_API_PRIVATE_OPT
925     FF_DISABLE_DEPRECATION_WARNINGS
926     if (avctx->brd_scale)
927         s->brd_scale = avctx->brd_scale;
928
929     if (avctx->prediction_method)
930         s->pred = avctx->prediction_method + 1;
931     FF_ENABLE_DEPRECATION_WARNINGS
932 #endif
933
934     if (s->b_frame_strategy == 2) {
935         for (i = 0; i < s->max_b_frames + 2; i++) {
936             s->tmp_frames[i] = av_frame_alloc();
937             if (!s->tmp_frames[i])
938                 return AVERROR(ENOMEM);
939
940             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
941             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
942             s->tmp_frames[i]->height = s->height >> s->brd_scale;
943
944             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
945             if (ret < 0)
946                 return ret;
947         }
948     }
949
950     cpb_props = ff_add_cpb_side_data(avctx);
951     if (!cpb_props)
952         return AVERROR(ENOMEM);
953     cpb_props->max_bitrate = avctx->rc_max_rate;
954     cpb_props->min_bitrate = avctx->rc_min_rate;
955     cpb_props->avg_bitrate = avctx->bit_rate;
956     cpb_props->buffer_size = avctx->rc_buffer_size;
957
958     return 0;
959 fail:
960     ff_mpv_encode_end(avctx);
961     return AVERROR_UNKNOWN;
962 }
963
964 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
965 {
966     MpegEncContext *s = avctx->priv_data;
967     int i;
968
969     ff_rate_control_uninit(s);
970     ff_mpv_common_end(s);
971     if (CONFIG_MJPEG_ENCODER &&
972         s->out_format == FMT_MJPEG)
973         ff_mjpeg_encode_close(s);
974
975     av_freep(&avctx->extradata);
976
977     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
978         av_frame_free(&s->tmp_frames[i]);
979
980     ff_free_picture_tables(&s->new_picture);
981     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
982
983     av_freep(&s->avctx->stats_out);
984     av_freep(&s->ac_stats);
985
986     av_freep(&s->q_intra_matrix);
987     av_freep(&s->q_inter_matrix);
988     av_freep(&s->q_intra_matrix16);
989     av_freep(&s->q_inter_matrix16);
990     av_freep(&s->input_picture);
991     av_freep(&s->reordered_input_picture);
992     av_freep(&s->dct_offset);
993
994     return 0;
995 }
996
997 static int get_sae(uint8_t *src, int ref, int stride)
998 {
999     int x,y;
1000     int acc = 0;
1001
1002     for (y = 0; y < 16; y++) {
1003         for (x = 0; x < 16; x++) {
1004             acc += FFABS(src[x + y * stride] - ref);
1005         }
1006     }
1007
1008     return acc;
1009 }
1010
1011 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1012                            uint8_t *ref, int stride)
1013 {
1014     int x, y, w, h;
1015     int acc = 0;
1016
1017     w = s->width  & ~15;
1018     h = s->height & ~15;
1019
1020     for (y = 0; y < h; y += 16) {
1021         for (x = 0; x < w; x += 16) {
1022             int offset = x + y * stride;
1023             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1024                                       stride, 16);
1025             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1026             int sae  = get_sae(src + offset, mean, stride);
1027
1028             acc += sae + 500 < sad;
1029         }
1030     }
1031     return acc;
1032 }
1033
1034 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1035 {
1036     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1037                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1038                             s->mb_stride, s->mb_height, s->b8_stride,
1039                             &s->linesize, &s->uvlinesize);
1040 }
1041
1042 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1043 {
1044     Picture *pic = NULL;
1045     int64_t pts;
1046     int i, display_picture_number = 0, ret;
1047     int encoding_delay = s->max_b_frames ? s->max_b_frames
1048                                          : (s->low_delay ? 0 : 1);
1049     int flush_offset = 1;
1050     int direct = 1;
1051
1052     if (pic_arg) {
1053         pts = pic_arg->pts;
1054         display_picture_number = s->input_picture_number++;
1055
1056         if (pts != AV_NOPTS_VALUE) {
1057             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1058                 int64_t time = pts;
1059                 int64_t last = s->user_specified_pts;
1060
1061                 if (time <= last) {
1062                     av_log(s->avctx, AV_LOG_ERROR,
1063                            "Error, Invalid timestamp=%"PRId64", "
1064                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1065                     return -1;
1066                 }
1067
1068                 if (!s->low_delay && display_picture_number == 1)
1069                     s->dts_delta = time - last;
1070             }
1071             s->user_specified_pts = pts;
1072         } else {
1073             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1074                 s->user_specified_pts =
1075                 pts = s->user_specified_pts + 1;
1076                 av_log(s->avctx, AV_LOG_INFO,
1077                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1078                        pts);
1079             } else {
1080                 pts = display_picture_number;
1081             }
1082         }
1083
1084         if (!pic_arg->buf[0] ||
1085             pic_arg->linesize[0] != s->linesize ||
1086             pic_arg->linesize[1] != s->uvlinesize ||
1087             pic_arg->linesize[2] != s->uvlinesize)
1088             direct = 0;
1089         if ((s->width & 15) || (s->height & 15))
1090             direct = 0;
1091
1092         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1093                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1094
1095         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1096         if (i < 0)
1097             return i;
1098
1099         pic = &s->picture[i];
1100         pic->reference = 3;
1101
1102         if (direct) {
1103             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1104                 return ret;
1105         }
1106         ret = alloc_picture(s, pic, direct);
1107         if (ret < 0)
1108             return ret;
1109
1110         if (!direct) {
1111             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1112                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1113                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1114                 // empty
1115             } else {
1116                 int h_chroma_shift, v_chroma_shift;
1117                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1118                                                  &h_chroma_shift,
1119                                                  &v_chroma_shift);
1120
1121                 for (i = 0; i < 3; i++) {
1122                     int src_stride = pic_arg->linesize[i];
1123                     int dst_stride = i ? s->uvlinesize : s->linesize;
1124                     int h_shift = i ? h_chroma_shift : 0;
1125                     int v_shift = i ? v_chroma_shift : 0;
1126                     int w = s->width  >> h_shift;
1127                     int h = s->height >> v_shift;
1128                     uint8_t *src = pic_arg->data[i];
1129                     uint8_t *dst = pic->f->data[i];
1130
1131                     if (!s->avctx->rc_buffer_size)
1132                         dst += INPLACE_OFFSET;
1133
1134                     if (src_stride == dst_stride)
1135                         memcpy(dst, src, src_stride * h);
1136                     else {
1137                         int h2 = h;
1138                         uint8_t *dst2 = dst;
1139                         while (h2--) {
1140                             memcpy(dst2, src, w);
1141                             dst2 += dst_stride;
1142                             src += src_stride;
1143                         }
1144                     }
1145                     if ((s->width & 15) || (s->height & 15)) {
1146                         s->mpvencdsp.draw_edges(dst, dst_stride,
1147                                                 w, h,
1148                                                 16 >> h_shift,
1149                                                 16 >> v_shift,
1150                                                 EDGE_BOTTOM);
1151                     }
1152                 }
1153             }
1154         }
1155         ret = av_frame_copy_props(pic->f, pic_arg);
1156         if (ret < 0)
1157             return ret;
1158
1159         pic->f->display_picture_number = display_picture_number;
1160         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1161     } else {
1162         /* Flushing: When we have not received enough input frames,
1163          * ensure s->input_picture[0] contains the first picture */
1164         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1165             if (s->input_picture[flush_offset])
1166                 break;
1167
1168         if (flush_offset <= 1)
1169             flush_offset = 1;
1170         else
1171             encoding_delay = encoding_delay - flush_offset + 1;
1172     }
1173
1174     /* shift buffer entries */
1175     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1176         s->input_picture[i - flush_offset] = s->input_picture[i];
1177
1178     s->input_picture[encoding_delay] = (Picture*) pic;
1179
1180     return 0;
1181 }
1182
1183 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1184 {
1185     int x, y, plane;
1186     int score = 0;
1187     int64_t score64 = 0;
1188
1189     for (plane = 0; plane < 3; plane++) {
1190         const int stride = p->f->linesize[plane];
1191         const int bw = plane ? 1 : 2;
1192         for (y = 0; y < s->mb_height * bw; y++) {
1193             for (x = 0; x < s->mb_width * bw; x++) {
1194                 int off = p->shared ? 0 : 16;
1195                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1196                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1197                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1198
1199                 switch (s->frame_skip_exp) {
1200                 case 0: score    =  FFMAX(score, v);          break;
1201                 case 1: score   += FFABS(v);                  break;
1202                 case 2: score   += v * v;                     break;
1203                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1204                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1205                 }
1206             }
1207         }
1208     }
1209
1210     if (score)
1211         score64 = score;
1212
1213     if (score64 < s->frame_skip_threshold)
1214         return 1;
1215     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1216         return 1;
1217     return 0;
1218 }
1219
1220 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1221 {
1222     AVPacket pkt = { 0 };
1223     int ret;
1224     int size = 0;
1225
1226     av_init_packet(&pkt);
1227
1228     ret = avcodec_send_frame(c, frame);
1229     if (ret < 0)
1230         return ret;
1231
1232     do {
1233         ret = avcodec_receive_packet(c, &pkt);
1234         if (ret >= 0) {
1235             size += pkt.size;
1236             av_packet_unref(&pkt);
1237         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1238             return ret;
1239     } while (ret >= 0);
1240
1241     return size;
1242 }
1243
1244 static int estimate_best_b_count(MpegEncContext *s)
1245 {
1246     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1247     const int scale = s->brd_scale;
1248     int width  = s->width  >> scale;
1249     int height = s->height >> scale;
1250     int i, j, out_size, p_lambda, b_lambda, lambda2;
1251     int64_t best_rd  = INT64_MAX;
1252     int best_b_count = -1;
1253     int ret = 0;
1254
1255     assert(scale >= 0 && scale <= 3);
1256
1257     //emms_c();
1258     //s->next_picture_ptr->quality;
1259     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1260     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1261     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1262     if (!b_lambda) // FIXME we should do this somewhere else
1263         b_lambda = p_lambda;
1264     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1265                FF_LAMBDA_SHIFT;
1266
1267     for (i = 0; i < s->max_b_frames + 2; i++) {
1268         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1269                                                 s->next_picture_ptr;
1270
1271         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1272             pre_input = *pre_input_ptr;
1273
1274             if (!pre_input.shared && i) {
1275                 pre_input.f->data[0] += INPLACE_OFFSET;
1276                 pre_input.f->data[1] += INPLACE_OFFSET;
1277                 pre_input.f->data[2] += INPLACE_OFFSET;
1278             }
1279
1280             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1281                                        s->tmp_frames[i]->linesize[0],
1282                                        pre_input.f->data[0],
1283                                        pre_input.f->linesize[0],
1284                                        width, height);
1285             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1286                                        s->tmp_frames[i]->linesize[1],
1287                                        pre_input.f->data[1],
1288                                        pre_input.f->linesize[1],
1289                                        width >> 1, height >> 1);
1290             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1291                                        s->tmp_frames[i]->linesize[2],
1292                                        pre_input.f->data[2],
1293                                        pre_input.f->linesize[2],
1294                                        width >> 1, height >> 1);
1295         }
1296     }
1297
1298     for (j = 0; j < s->max_b_frames + 1; j++) {
1299         AVCodecContext *c;
1300         int64_t rd = 0;
1301
1302         if (!s->input_picture[j])
1303             break;
1304
1305         c = avcodec_alloc_context3(NULL);
1306         if (!c)
1307             return AVERROR(ENOMEM);
1308
1309         c->width        = width;
1310         c->height       = height;
1311         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1312         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1313         c->mb_decision  = s->avctx->mb_decision;
1314         c->me_cmp       = s->avctx->me_cmp;
1315         c->mb_cmp       = s->avctx->mb_cmp;
1316         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1317         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1318         c->time_base    = s->avctx->time_base;
1319         c->max_b_frames = s->max_b_frames;
1320
1321         ret = avcodec_open2(c, codec, NULL);
1322         if (ret < 0)
1323             goto fail;
1324
1325         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1326         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1327
1328         out_size = encode_frame(c, s->tmp_frames[0]);
1329         if (out_size < 0) {
1330             ret = out_size;
1331             goto fail;
1332         }
1333
1334         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1335
1336         for (i = 0; i < s->max_b_frames + 1; i++) {
1337             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1338
1339             s->tmp_frames[i + 1]->pict_type = is_p ?
1340                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1341             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1342
1343             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1344             if (out_size < 0) {
1345                 ret = out_size;
1346                 goto fail;
1347             }
1348
1349             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1350         }
1351
1352         /* get the delayed frames */
1353         out_size = encode_frame(c, NULL);
1354         if (out_size < 0) {
1355             ret = out_size;
1356             goto fail;
1357         }
1358         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1359
1360         rd += c->error[0] + c->error[1] + c->error[2];
1361
1362         if (rd < best_rd) {
1363             best_rd = rd;
1364             best_b_count = j;
1365         }
1366
1367 fail:
1368         avcodec_free_context(&c);
1369         if (ret < 0)
1370             return ret;
1371     }
1372
1373     return best_b_count;
1374 }
1375
1376 static int select_input_picture(MpegEncContext *s)
1377 {
1378     int i, ret;
1379
1380     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1381         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1382     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1383
1384     /* set next picture type & ordering */
1385     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1386         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1387             !s->next_picture_ptr || s->intra_only) {
1388             s->reordered_input_picture[0] = s->input_picture[0];
1389             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1390             s->reordered_input_picture[0]->f->coded_picture_number =
1391                 s->coded_picture_number++;
1392         } else {
1393             int b_frames = 0;
1394
1395             if (s->frame_skip_threshold || s->frame_skip_factor) {
1396                 if (s->picture_in_gop_number < s->gop_size &&
1397                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1398                     // FIXME check that the gop check above is +-1 correct
1399                     av_frame_unref(s->input_picture[0]->f);
1400
1401                     emms_c();
1402                     ff_vbv_update(s, 0);
1403
1404                     goto no_output_pic;
1405                 }
1406             }
1407
1408             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1409                 for (i = 0; i < s->max_b_frames + 1; i++) {
1410                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1411
1412                     if (pict_num >= s->rc_context.num_entries)
1413                         break;
1414                     if (!s->input_picture[i]) {
1415                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1416                         break;
1417                     }
1418
1419                     s->input_picture[i]->f->pict_type =
1420                         s->rc_context.entry[pict_num].new_pict_type;
1421                 }
1422             }
1423
1424             if (s->b_frame_strategy == 0) {
1425                 b_frames = s->max_b_frames;
1426                 while (b_frames && !s->input_picture[b_frames])
1427                     b_frames--;
1428             } else if (s->b_frame_strategy == 1) {
1429                 for (i = 1; i < s->max_b_frames + 1; i++) {
1430                     if (s->input_picture[i] &&
1431                         s->input_picture[i]->b_frame_score == 0) {
1432                         s->input_picture[i]->b_frame_score =
1433                             get_intra_count(s,
1434                                             s->input_picture[i    ]->f->data[0],
1435                                             s->input_picture[i - 1]->f->data[0],
1436                                             s->linesize) + 1;
1437                     }
1438                 }
1439                 for (i = 0; i < s->max_b_frames + 1; i++) {
1440                     if (!s->input_picture[i] ||
1441                         s->input_picture[i]->b_frame_score - 1 >
1442                             s->mb_num / s->b_sensitivity)
1443                         break;
1444                 }
1445
1446                 b_frames = FFMAX(0, i - 1);
1447
1448                 /* reset scores */
1449                 for (i = 0; i < b_frames + 1; i++) {
1450                     s->input_picture[i]->b_frame_score = 0;
1451                 }
1452             } else if (s->b_frame_strategy == 2) {
1453                 b_frames = estimate_best_b_count(s);
1454                 if (b_frames < 0)
1455                     return b_frames;
1456             }
1457
1458             emms_c();
1459
1460             for (i = b_frames - 1; i >= 0; i--) {
1461                 int type = s->input_picture[i]->f->pict_type;
1462                 if (type && type != AV_PICTURE_TYPE_B)
1463                     b_frames = i;
1464             }
1465             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1466                 b_frames == s->max_b_frames) {
1467                 av_log(s->avctx, AV_LOG_ERROR,
1468                        "warning, too many B-frames in a row\n");
1469             }
1470
1471             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1472                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1473                     s->gop_size > s->picture_in_gop_number) {
1474                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1475                 } else {
1476                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1477                         b_frames = 0;
1478                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1479                 }
1480             }
1481
1482             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1483                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1484                 b_frames--;
1485
1486             s->reordered_input_picture[0] = s->input_picture[b_frames];
1487             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1488                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1489             s->reordered_input_picture[0]->f->coded_picture_number =
1490                 s->coded_picture_number++;
1491             for (i = 0; i < b_frames; i++) {
1492                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1493                 s->reordered_input_picture[i + 1]->f->pict_type =
1494                     AV_PICTURE_TYPE_B;
1495                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1496                     s->coded_picture_number++;
1497             }
1498         }
1499     }
1500 no_output_pic:
1501     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1502
1503     if (s->reordered_input_picture[0]) {
1504         s->reordered_input_picture[0]->reference =
1505            s->reordered_input_picture[0]->f->pict_type !=
1506                AV_PICTURE_TYPE_B ? 3 : 0;
1507
1508         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1509             return ret;
1510
1511         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1512             // input is a shared pix, so we can't modify it -> allocate a new
1513             // one & ensure that the shared one is reuseable
1514
1515             Picture *pic;
1516             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1517             if (i < 0)
1518                 return i;
1519             pic = &s->picture[i];
1520
1521             pic->reference = s->reordered_input_picture[0]->reference;
1522             if (alloc_picture(s, pic, 0) < 0) {
1523                 return -1;
1524             }
1525
1526             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1527             if (ret < 0)
1528                 return ret;
1529
1530             /* mark us unused / free shared pic */
1531             av_frame_unref(s->reordered_input_picture[0]->f);
1532             s->reordered_input_picture[0]->shared = 0;
1533
1534             s->current_picture_ptr = pic;
1535         } else {
1536             // input is not a shared pix -> reuse buffer for current_pix
1537             s->current_picture_ptr = s->reordered_input_picture[0];
1538             for (i = 0; i < 4; i++) {
1539                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1540             }
1541         }
1542         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1543         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1544                                        s->current_picture_ptr)) < 0)
1545             return ret;
1546
1547         s->picture_number = s->new_picture.f->display_picture_number;
1548     }
1549     return 0;
1550 }
1551
1552 static void frame_end(MpegEncContext *s)
1553 {
1554     int i;
1555
1556     if (s->unrestricted_mv &&
1557         s->current_picture.reference &&
1558         !s->intra_only) {
1559         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1560         int hshift = desc->log2_chroma_w;
1561         int vshift = desc->log2_chroma_h;
1562         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1563                                 s->h_edge_pos, s->v_edge_pos,
1564                                 EDGE_WIDTH, EDGE_WIDTH,
1565                                 EDGE_TOP | EDGE_BOTTOM);
1566         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1567                                 s->h_edge_pos >> hshift,
1568                                 s->v_edge_pos >> vshift,
1569                                 EDGE_WIDTH >> hshift,
1570                                 EDGE_WIDTH >> vshift,
1571                                 EDGE_TOP | EDGE_BOTTOM);
1572         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1573                                 s->h_edge_pos >> hshift,
1574                                 s->v_edge_pos >> vshift,
1575                                 EDGE_WIDTH >> hshift,
1576                                 EDGE_WIDTH >> vshift,
1577                                 EDGE_TOP | EDGE_BOTTOM);
1578     }
1579
1580     emms_c();
1581
1582     s->last_pict_type                 = s->pict_type;
1583     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1584     if (s->pict_type!= AV_PICTURE_TYPE_B)
1585         s->last_non_b_pict_type = s->pict_type;
1586
1587     if (s->encoding) {
1588         /* release non-reference frames */
1589         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1590             if (!s->picture[i].reference)
1591                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1592         }
1593     }
1594
1595 #if FF_API_CODED_FRAME
1596 FF_DISABLE_DEPRECATION_WARNINGS
1597     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1598 FF_ENABLE_DEPRECATION_WARNINGS
1599 #endif
1600 #if FF_API_ERROR_FRAME
1601 FF_DISABLE_DEPRECATION_WARNINGS
1602     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1603            sizeof(s->current_picture.encoding_error));
1604 FF_ENABLE_DEPRECATION_WARNINGS
1605 #endif
1606 }
1607
1608 static void update_noise_reduction(MpegEncContext *s)
1609 {
1610     int intra, i;
1611
1612     for (intra = 0; intra < 2; intra++) {
1613         if (s->dct_count[intra] > (1 << 16)) {
1614             for (i = 0; i < 64; i++) {
1615                 s->dct_error_sum[intra][i] >>= 1;
1616             }
1617             s->dct_count[intra] >>= 1;
1618         }
1619
1620         for (i = 0; i < 64; i++) {
1621             s->dct_offset[intra][i] = (s->noise_reduction *
1622                                        s->dct_count[intra] +
1623                                        s->dct_error_sum[intra][i] / 2) /
1624                                       (s->dct_error_sum[intra][i] + 1);
1625         }
1626     }
1627 }
1628
1629 static int frame_start(MpegEncContext *s)
1630 {
1631     int ret;
1632
1633     /* mark & release old frames */
1634     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1635         s->last_picture_ptr != s->next_picture_ptr &&
1636         s->last_picture_ptr->f->buf[0]) {
1637         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1638     }
1639
1640     s->current_picture_ptr->f->pict_type = s->pict_type;
1641     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1642
1643     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1644     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1645                                    s->current_picture_ptr)) < 0)
1646         return ret;
1647
1648     if (s->pict_type != AV_PICTURE_TYPE_B) {
1649         s->last_picture_ptr = s->next_picture_ptr;
1650         if (!s->droppable)
1651             s->next_picture_ptr = s->current_picture_ptr;
1652     }
1653
1654     if (s->last_picture_ptr) {
1655         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1656         if (s->last_picture_ptr->f->buf[0] &&
1657             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1658                                        s->last_picture_ptr)) < 0)
1659             return ret;
1660     }
1661     if (s->next_picture_ptr) {
1662         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1663         if (s->next_picture_ptr->f->buf[0] &&
1664             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1665                                        s->next_picture_ptr)) < 0)
1666             return ret;
1667     }
1668
1669     if (s->picture_structure!= PICT_FRAME) {
1670         int i;
1671         for (i = 0; i < 4; i++) {
1672             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1673                 s->current_picture.f->data[i] +=
1674                     s->current_picture.f->linesize[i];
1675             }
1676             s->current_picture.f->linesize[i] *= 2;
1677             s->last_picture.f->linesize[i]    *= 2;
1678             s->next_picture.f->linesize[i]    *= 2;
1679         }
1680     }
1681
1682     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1683         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1684         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1685     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1686         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1687         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1688     } else {
1689         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1690         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1691     }
1692
1693     if (s->dct_error_sum) {
1694         assert(s->noise_reduction && s->encoding);
1695         update_noise_reduction(s);
1696     }
1697
1698     return 0;
1699 }
1700
1701 static void write_pass1_stats(MpegEncContext *s)
1702 {
1703     snprintf(s->avctx->stats_out, 256,
1704              "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d "
1705              "fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d "
1706              "hbits:%d;\n",
1707              s->current_picture_ptr->f->display_picture_number,
1708              s->current_picture_ptr->f->coded_picture_number,
1709              s->pict_type,
1710              s->current_picture.f->quality,
1711              s->i_tex_bits,
1712              s->p_tex_bits,
1713              s->mv_bits,
1714              s->misc_bits,
1715              s->f_code,
1716              s->b_code,
1717              s->current_picture.mc_mb_var_sum,
1718              s->current_picture.mb_var_sum,
1719              s->i_count, s->skip_count,
1720              s->header_bits);
1721 }
1722
1723 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1724                           const AVFrame *pic_arg, int *got_packet)
1725 {
1726     MpegEncContext *s = avctx->priv_data;
1727     int i, stuffing_count, ret;
1728     int context_count = s->slice_context_count;
1729
1730     s->picture_in_gop_number++;
1731
1732     if (load_input_picture(s, pic_arg) < 0)
1733         return -1;
1734
1735     if (select_input_picture(s) < 0) {
1736         return -1;
1737     }
1738
1739     /* output? */
1740     if (s->new_picture.f->data[0]) {
1741         uint8_t *sd;
1742         if (!pkt->data &&
1743             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1744             return ret;
1745         if (s->mb_info) {
1746             s->mb_info_ptr = av_packet_new_side_data(pkt,
1747                                  AV_PKT_DATA_H263_MB_INFO,
1748                                  s->mb_width*s->mb_height*12);
1749             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1750         }
1751
1752         for (i = 0; i < context_count; i++) {
1753             int start_y = s->thread_context[i]->start_mb_y;
1754             int   end_y = s->thread_context[i]->  end_mb_y;
1755             int h       = s->mb_height;
1756             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1757             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1758
1759             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1760         }
1761
1762         s->pict_type = s->new_picture.f->pict_type;
1763         //emms_c();
1764         ret = frame_start(s);
1765         if (ret < 0)
1766             return ret;
1767 vbv_retry:
1768         if (encode_picture(s, s->picture_number) < 0)
1769             return -1;
1770
1771 #if FF_API_STAT_BITS
1772 FF_DISABLE_DEPRECATION_WARNINGS
1773         avctx->header_bits = s->header_bits;
1774         avctx->mv_bits     = s->mv_bits;
1775         avctx->misc_bits   = s->misc_bits;
1776         avctx->i_tex_bits  = s->i_tex_bits;
1777         avctx->p_tex_bits  = s->p_tex_bits;
1778         avctx->i_count     = s->i_count;
1779         // FIXME f/b_count in avctx
1780         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1781         avctx->skip_count  = s->skip_count;
1782 FF_ENABLE_DEPRECATION_WARNINGS
1783 #endif
1784
1785         frame_end(s);
1786
1787         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1788                                      sizeof(int));
1789         if (!sd)
1790             return AVERROR(ENOMEM);
1791         *(int *)sd = s->current_picture.f->quality;
1792
1793         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1794             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1795
1796         if (avctx->rc_buffer_size) {
1797             RateControlContext *rcc = &s->rc_context;
1798             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1799
1800             if (put_bits_count(&s->pb) > max_size &&
1801                 s->lambda < s->lmax) {
1802                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1803                                        (s->qscale + 1) / s->qscale);
1804                 if (s->adaptive_quant) {
1805                     int i;
1806                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1807                         s->lambda_table[i] =
1808                             FFMAX(s->lambda_table[i] + 1,
1809                                   s->lambda_table[i] * (s->qscale + 1) /
1810                                   s->qscale);
1811                 }
1812                 s->mb_skipped = 0;        // done in frame_start()
1813                 // done in encode_picture() so we must undo it
1814                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1815                     if (s->flipflop_rounding          ||
1816                         s->codec_id == AV_CODEC_ID_H263P ||
1817                         s->codec_id == AV_CODEC_ID_MPEG4)
1818                         s->no_rounding ^= 1;
1819                 }
1820                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1821                     s->time_base       = s->last_time_base;
1822                     s->last_non_b_time = s->time - s->pp_time;
1823                 }
1824                 for (i = 0; i < context_count; i++) {
1825                     PutBitContext *pb = &s->thread_context[i]->pb;
1826                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1827                 }
1828                 goto vbv_retry;
1829             }
1830
1831             assert(s->avctx->rc_max_rate);
1832         }
1833
1834         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1835             write_pass1_stats(s);
1836
1837         for (i = 0; i < 4; i++) {
1838             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1839             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1840         }
1841
1842         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1843             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1844                                              s->misc_bits + s->i_tex_bits +
1845                                              s->p_tex_bits);
1846         flush_put_bits(&s->pb);
1847         s->frame_bits  = put_bits_count(&s->pb);
1848
1849         stuffing_count = ff_vbv_update(s, s->frame_bits);
1850         if (stuffing_count) {
1851             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1852                     stuffing_count + 50) {
1853                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1854                 return -1;
1855             }
1856
1857             switch (s->codec_id) {
1858             case AV_CODEC_ID_MPEG1VIDEO:
1859             case AV_CODEC_ID_MPEG2VIDEO:
1860                 while (stuffing_count--) {
1861                     put_bits(&s->pb, 8, 0);
1862                 }
1863             break;
1864             case AV_CODEC_ID_MPEG4:
1865                 put_bits(&s->pb, 16, 0);
1866                 put_bits(&s->pb, 16, 0x1C3);
1867                 stuffing_count -= 4;
1868                 while (stuffing_count--) {
1869                     put_bits(&s->pb, 8, 0xFF);
1870                 }
1871             break;
1872             default:
1873                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1874             }
1875             flush_put_bits(&s->pb);
1876             s->frame_bits  = put_bits_count(&s->pb);
1877         }
1878
1879         /* update MPEG-1/2 vbv_delay for CBR */
1880         if (s->avctx->rc_max_rate                          &&
1881             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1882             s->out_format == FMT_MPEG1                     &&
1883             90000LL * (avctx->rc_buffer_size - 1) <=
1884                 s->avctx->rc_max_rate * 0xFFFFLL) {
1885             AVCPBProperties *props;
1886             size_t props_size;
1887
1888             int vbv_delay, min_delay;
1889             double inbits  = s->avctx->rc_max_rate *
1890                              av_q2d(s->avctx->time_base);
1891             int    minbits = s->frame_bits - 8 *
1892                              (s->vbv_delay_ptr - s->pb.buf - 1);
1893             double bits    = s->rc_context.buffer_index + minbits - inbits;
1894
1895             if (bits < 0)
1896                 av_log(s->avctx, AV_LOG_ERROR,
1897                        "Internal error, negative bits\n");
1898
1899             assert(s->repeat_first_field == 0);
1900
1901             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1902             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1903                         s->avctx->rc_max_rate;
1904
1905             vbv_delay = FFMAX(vbv_delay, min_delay);
1906
1907             assert(vbv_delay < 0xFFFF);
1908
1909             s->vbv_delay_ptr[0] &= 0xF8;
1910             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1911             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1912             s->vbv_delay_ptr[2] &= 0x07;
1913             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1914
1915             props = av_cpb_properties_alloc(&props_size);
1916             if (!props)
1917                 return AVERROR(ENOMEM);
1918             props->vbv_delay = vbv_delay * 300;
1919
1920             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1921                                           (uint8_t*)props, props_size);
1922             if (ret < 0) {
1923                 av_freep(&props);
1924                 return ret;
1925             }
1926
1927 #if FF_API_VBV_DELAY
1928 FF_DISABLE_DEPRECATION_WARNINGS
1929             avctx->vbv_delay     = vbv_delay * 300;
1930 FF_ENABLE_DEPRECATION_WARNINGS
1931 #endif
1932         }
1933         s->total_bits     += s->frame_bits;
1934 #if FF_API_STAT_BITS
1935 FF_DISABLE_DEPRECATION_WARNINGS
1936         avctx->frame_bits  = s->frame_bits;
1937 FF_ENABLE_DEPRECATION_WARNINGS
1938 #endif
1939
1940
1941         pkt->pts = s->current_picture.f->pts;
1942         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1943             if (!s->current_picture.f->coded_picture_number)
1944                 pkt->dts = pkt->pts - s->dts_delta;
1945             else
1946                 pkt->dts = s->reordered_pts;
1947             s->reordered_pts = pkt->pts;
1948         } else
1949             pkt->dts = pkt->pts;
1950         if (s->current_picture.f->key_frame)
1951             pkt->flags |= AV_PKT_FLAG_KEY;
1952         if (s->mb_info)
1953             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1954     } else {
1955         s->frame_bits = 0;
1956     }
1957     assert((s->frame_bits & 7) == 0);
1958
1959     pkt->size = s->frame_bits / 8;
1960     *got_packet = !!pkt->size;
1961     return 0;
1962 }
1963
1964 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1965                                                 int n, int threshold)
1966 {
1967     static const char tab[64] = {
1968         3, 2, 2, 1, 1, 1, 1, 1,
1969         1, 1, 1, 1, 1, 1, 1, 1,
1970         1, 1, 1, 1, 1, 1, 1, 1,
1971         0, 0, 0, 0, 0, 0, 0, 0,
1972         0, 0, 0, 0, 0, 0, 0, 0,
1973         0, 0, 0, 0, 0, 0, 0, 0,
1974         0, 0, 0, 0, 0, 0, 0, 0,
1975         0, 0, 0, 0, 0, 0, 0, 0
1976     };
1977     int score = 0;
1978     int run = 0;
1979     int i;
1980     int16_t *block = s->block[n];
1981     const int last_index = s->block_last_index[n];
1982     int skip_dc;
1983
1984     if (threshold < 0) {
1985         skip_dc = 0;
1986         threshold = -threshold;
1987     } else
1988         skip_dc = 1;
1989
1990     /* Are all we could set to zero already zero? */
1991     if (last_index <= skip_dc - 1)
1992         return;
1993
1994     for (i = 0; i <= last_index; i++) {
1995         const int j = s->intra_scantable.permutated[i];
1996         const int level = FFABS(block[j]);
1997         if (level == 1) {
1998             if (skip_dc && i == 0)
1999                 continue;
2000             score += tab[run];
2001             run = 0;
2002         } else if (level > 1) {
2003             return;
2004         } else {
2005             run++;
2006         }
2007     }
2008     if (score >= threshold)
2009         return;
2010     for (i = skip_dc; i <= last_index; i++) {
2011         const int j = s->intra_scantable.permutated[i];
2012         block[j] = 0;
2013     }
2014     if (block[0])
2015         s->block_last_index[n] = 0;
2016     else
2017         s->block_last_index[n] = -1;
2018 }
2019
2020 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2021                                int last_index)
2022 {
2023     int i;
2024     const int maxlevel = s->max_qcoeff;
2025     const int minlevel = s->min_qcoeff;
2026     int overflow = 0;
2027
2028     if (s->mb_intra) {
2029         i = 1; // skip clipping of intra dc
2030     } else
2031         i = 0;
2032
2033     for (; i <= last_index; i++) {
2034         const int j = s->intra_scantable.permutated[i];
2035         int level = block[j];
2036
2037         if (level > maxlevel) {
2038             level = maxlevel;
2039             overflow++;
2040         } else if (level < minlevel) {
2041             level = minlevel;
2042             overflow++;
2043         }
2044
2045         block[j] = level;
2046     }
2047
2048     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2049         av_log(s->avctx, AV_LOG_INFO,
2050                "warning, clipping %d dct coefficients to %d..%d\n",
2051                overflow, minlevel, maxlevel);
2052 }
2053
2054 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2055 {
2056     int x, y;
2057     // FIXME optimize
2058     for (y = 0; y < 8; y++) {
2059         for (x = 0; x < 8; x++) {
2060             int x2, y2;
2061             int sum = 0;
2062             int sqr = 0;
2063             int count = 0;
2064
2065             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2066                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2067                     int v = ptr[x2 + y2 * stride];
2068                     sum += v;
2069                     sqr += v * v;
2070                     count++;
2071                 }
2072             }
2073             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2074         }
2075     }
2076 }
2077
2078 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2079                                                 int motion_x, int motion_y,
2080                                                 int mb_block_height,
2081                                                 int mb_block_count)
2082 {
2083     int16_t weight[8][64];
2084     int16_t orig[8][64];
2085     const int mb_x = s->mb_x;
2086     const int mb_y = s->mb_y;
2087     int i;
2088     int skip_dct[8];
2089     int dct_offset = s->linesize * 8; // default for progressive frames
2090     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2091     ptrdiff_t wrap_y, wrap_c;
2092
2093     for (i = 0; i < mb_block_count; i++)
2094         skip_dct[i] = s->skipdct;
2095
2096     if (s->adaptive_quant) {
2097         const int last_qp = s->qscale;
2098         const int mb_xy = mb_x + mb_y * s->mb_stride;
2099
2100         s->lambda = s->lambda_table[mb_xy];
2101         update_qscale(s);
2102
2103         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2104             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2105             s->dquant = s->qscale - last_qp;
2106
2107             if (s->out_format == FMT_H263) {
2108                 s->dquant = av_clip(s->dquant, -2, 2);
2109
2110                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2111                     if (!s->mb_intra) {
2112                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2113                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2114                                 s->dquant = 0;
2115                         }
2116                         if (s->mv_type == MV_TYPE_8X8)
2117                             s->dquant = 0;
2118                     }
2119                 }
2120             }
2121         }
2122         ff_set_qscale(s, last_qp + s->dquant);
2123     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2124         ff_set_qscale(s, s->qscale + s->dquant);
2125
2126     wrap_y = s->linesize;
2127     wrap_c = s->uvlinesize;
2128     ptr_y  = s->new_picture.f->data[0] +
2129              (mb_y * 16 * wrap_y)              + mb_x * 16;
2130     ptr_cb = s->new_picture.f->data[1] +
2131              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2132     ptr_cr = s->new_picture.f->data[2] +
2133              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2134
2135     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2136         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2137         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2138                                  wrap_y, wrap_y,
2139                                  16, 16, mb_x * 16, mb_y * 16,
2140                                  s->width, s->height);
2141         ptr_y = ebuf;
2142         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2143                                  wrap_c, wrap_c,
2144                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2145                                  s->width >> 1, s->height >> 1);
2146         ptr_cb = ebuf + 18 * wrap_y;
2147         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2148                                  wrap_c, wrap_c,
2149                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2150                                  s->width >> 1, s->height >> 1);
2151         ptr_cr = ebuf + 18 * wrap_y + 8;
2152     }
2153
2154     if (s->mb_intra) {
2155         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2156             int progressive_score, interlaced_score;
2157
2158             s->interlaced_dct = 0;
2159             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2160                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2161                                                      NULL, wrap_y, 8) - 400;
2162
2163             if (progressive_score > 0) {
2164                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2165                                                         NULL, wrap_y * 2, 8) +
2166                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2167                                                         NULL, wrap_y * 2, 8);
2168                 if (progressive_score > interlaced_score) {
2169                     s->interlaced_dct = 1;
2170
2171                     dct_offset = wrap_y;
2172                     wrap_y <<= 1;
2173                     if (s->chroma_format == CHROMA_422)
2174                         wrap_c <<= 1;
2175                 }
2176             }
2177         }
2178
2179         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2180         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2181         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2182         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2183
2184         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2185             skip_dct[4] = 1;
2186             skip_dct[5] = 1;
2187         } else {
2188             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2189             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2190             if (!s->chroma_y_shift) { /* 422 */
2191                 s->pdsp.get_pixels(s->block[6],
2192                                    ptr_cb + (dct_offset >> 1), wrap_c);
2193                 s->pdsp.get_pixels(s->block[7],
2194                                    ptr_cr + (dct_offset >> 1), wrap_c);
2195             }
2196         }
2197     } else {
2198         op_pixels_func (*op_pix)[4];
2199         qpel_mc_func (*op_qpix)[16];
2200         uint8_t *dest_y, *dest_cb, *dest_cr;
2201
2202         dest_y  = s->dest[0];
2203         dest_cb = s->dest[1];
2204         dest_cr = s->dest[2];
2205
2206         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2207             op_pix  = s->hdsp.put_pixels_tab;
2208             op_qpix = s->qdsp.put_qpel_pixels_tab;
2209         } else {
2210             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2211             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2212         }
2213
2214         if (s->mv_dir & MV_DIR_FORWARD) {
2215             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2216                           s->last_picture.f->data,
2217                           op_pix, op_qpix);
2218             op_pix  = s->hdsp.avg_pixels_tab;
2219             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2220         }
2221         if (s->mv_dir & MV_DIR_BACKWARD) {
2222             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2223                           s->next_picture.f->data,
2224                           op_pix, op_qpix);
2225         }
2226
2227         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2228             int progressive_score, interlaced_score;
2229
2230             s->interlaced_dct = 0;
2231             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2232                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2233                                                      ptr_y + wrap_y * 8,
2234                                                      wrap_y, 8) - 400;
2235
2236             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2237                 progressive_score -= 400;
2238
2239             if (progressive_score > 0) {
2240                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2241                                                         wrap_y * 2, 8) +
2242                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2243                                                         ptr_y + wrap_y,
2244                                                         wrap_y * 2, 8);
2245
2246                 if (progressive_score > interlaced_score) {
2247                     s->interlaced_dct = 1;
2248
2249                     dct_offset = wrap_y;
2250                     wrap_y <<= 1;
2251                     if (s->chroma_format == CHROMA_422)
2252                         wrap_c <<= 1;
2253                 }
2254             }
2255         }
2256
2257         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2258         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2259         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2260                             dest_y + dct_offset, wrap_y);
2261         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2262                             dest_y + dct_offset + 8, wrap_y);
2263
2264         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2265             skip_dct[4] = 1;
2266             skip_dct[5] = 1;
2267         } else {
2268             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2269             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2270             if (!s->chroma_y_shift) { /* 422 */
2271                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2272                                     dest_cb + (dct_offset >> 1), wrap_c);
2273                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2274                                     dest_cr + (dct_offset >> 1), wrap_c);
2275             }
2276         }
2277         /* pre quantization */
2278         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2279                 2 * s->qscale * s->qscale) {
2280             // FIXME optimize
2281             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2282                 skip_dct[0] = 1;
2283             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2284                 skip_dct[1] = 1;
2285             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2286                                wrap_y, 8) < 20 * s->qscale)
2287                 skip_dct[2] = 1;
2288             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2289                                wrap_y, 8) < 20 * s->qscale)
2290                 skip_dct[3] = 1;
2291             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2292                 skip_dct[4] = 1;
2293             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2294                 skip_dct[5] = 1;
2295             if (!s->chroma_y_shift) { /* 422 */
2296                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2297                                    dest_cb + (dct_offset >> 1),
2298                                    wrap_c, 8) < 20 * s->qscale)
2299                     skip_dct[6] = 1;
2300                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2301                                    dest_cr + (dct_offset >> 1),
2302                                    wrap_c, 8) < 20 * s->qscale)
2303                     skip_dct[7] = 1;
2304             }
2305         }
2306     }
2307
2308     if (s->quantizer_noise_shaping) {
2309         if (!skip_dct[0])
2310             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2311         if (!skip_dct[1])
2312             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2313         if (!skip_dct[2])
2314             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2315         if (!skip_dct[3])
2316             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2317         if (!skip_dct[4])
2318             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2319         if (!skip_dct[5])
2320             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2321         if (!s->chroma_y_shift) { /* 422 */
2322             if (!skip_dct[6])
2323                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2324                                   wrap_c);
2325             if (!skip_dct[7])
2326                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2327                                   wrap_c);
2328         }
2329         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2330     }
2331
2332     /* DCT & quantize */
2333     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2334     {
2335         for (i = 0; i < mb_block_count; i++) {
2336             if (!skip_dct[i]) {
2337                 int overflow;
2338                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2339                 // FIXME we could decide to change to quantizer instead of
2340                 // clipping
2341                 // JS: I don't think that would be a good idea it could lower
2342                 //     quality instead of improve it. Just INTRADC clipping
2343                 //     deserves changes in quantizer
2344                 if (overflow)
2345                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2346             } else
2347                 s->block_last_index[i] = -1;
2348         }
2349         if (s->quantizer_noise_shaping) {
2350             for (i = 0; i < mb_block_count; i++) {
2351                 if (!skip_dct[i]) {
2352                     s->block_last_index[i] =
2353                         dct_quantize_refine(s, s->block[i], weight[i],
2354                                             orig[i], i, s->qscale);
2355                 }
2356             }
2357         }
2358
2359         if (s->luma_elim_threshold && !s->mb_intra)
2360             for (i = 0; i < 4; i++)
2361                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2362         if (s->chroma_elim_threshold && !s->mb_intra)
2363             for (i = 4; i < mb_block_count; i++)
2364                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2365
2366         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2367             for (i = 0; i < mb_block_count; i++) {
2368                 if (s->block_last_index[i] == -1)
2369                     s->coded_score[i] = INT_MAX / 256;
2370             }
2371         }
2372     }
2373
2374     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2375         s->block_last_index[4] =
2376         s->block_last_index[5] = 0;
2377         s->block[4][0] =
2378         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2379     }
2380
2381     // non c quantize code returns incorrect block_last_index FIXME
2382     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2383         for (i = 0; i < mb_block_count; i++) {
2384             int j;
2385             if (s->block_last_index[i] > 0) {
2386                 for (j = 63; j > 0; j--) {
2387                     if (s->block[i][s->intra_scantable.permutated[j]])
2388                         break;
2389                 }
2390                 s->block_last_index[i] = j;
2391             }
2392         }
2393     }
2394
2395     /* huffman encode */
2396     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2397     case AV_CODEC_ID_MPEG1VIDEO:
2398     case AV_CODEC_ID_MPEG2VIDEO:
2399         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2400             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2401         break;
2402     case AV_CODEC_ID_MPEG4:
2403         if (CONFIG_MPEG4_ENCODER)
2404             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2405         break;
2406     case AV_CODEC_ID_MSMPEG4V2:
2407     case AV_CODEC_ID_MSMPEG4V3:
2408     case AV_CODEC_ID_WMV1:
2409         if (CONFIG_MSMPEG4_ENCODER)
2410             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2411         break;
2412     case AV_CODEC_ID_WMV2:
2413         if (CONFIG_WMV2_ENCODER)
2414             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2415         break;
2416     case AV_CODEC_ID_H261:
2417         if (CONFIG_H261_ENCODER)
2418             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2419         break;
2420     case AV_CODEC_ID_H263:
2421     case AV_CODEC_ID_H263P:
2422     case AV_CODEC_ID_FLV1:
2423     case AV_CODEC_ID_RV10:
2424     case AV_CODEC_ID_RV20:
2425         if (CONFIG_H263_ENCODER)
2426             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2427         break;
2428     case AV_CODEC_ID_MJPEG:
2429         if (CONFIG_MJPEG_ENCODER)
2430             ff_mjpeg_encode_mb(s, s->block);
2431         break;
2432     default:
2433         assert(0);
2434     }
2435 }
2436
2437 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2438 {
2439     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2440     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2441 }
2442
2443 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2444     int i;
2445
2446     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2447
2448     /* MPEG-1 */
2449     d->mb_skip_run= s->mb_skip_run;
2450     for(i=0; i<3; i++)
2451         d->last_dc[i] = s->last_dc[i];
2452
2453     /* statistics */
2454     d->mv_bits= s->mv_bits;
2455     d->i_tex_bits= s->i_tex_bits;
2456     d->p_tex_bits= s->p_tex_bits;
2457     d->i_count= s->i_count;
2458     d->f_count= s->f_count;
2459     d->b_count= s->b_count;
2460     d->skip_count= s->skip_count;
2461     d->misc_bits= s->misc_bits;
2462     d->last_bits= 0;
2463
2464     d->mb_skipped= 0;
2465     d->qscale= s->qscale;
2466     d->dquant= s->dquant;
2467
2468     d->esc3_level_length= s->esc3_level_length;
2469 }
2470
2471 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2472     int i;
2473
2474     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2475     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2476
2477     /* MPEG-1 */
2478     d->mb_skip_run= s->mb_skip_run;
2479     for(i=0; i<3; i++)
2480         d->last_dc[i] = s->last_dc[i];
2481
2482     /* statistics */
2483     d->mv_bits= s->mv_bits;
2484     d->i_tex_bits= s->i_tex_bits;
2485     d->p_tex_bits= s->p_tex_bits;
2486     d->i_count= s->i_count;
2487     d->f_count= s->f_count;
2488     d->b_count= s->b_count;
2489     d->skip_count= s->skip_count;
2490     d->misc_bits= s->misc_bits;
2491
2492     d->mb_intra= s->mb_intra;
2493     d->mb_skipped= s->mb_skipped;
2494     d->mv_type= s->mv_type;
2495     d->mv_dir= s->mv_dir;
2496     d->pb= s->pb;
2497     if(s->data_partitioning){
2498         d->pb2= s->pb2;
2499         d->tex_pb= s->tex_pb;
2500     }
2501     d->block= s->block;
2502     for(i=0; i<8; i++)
2503         d->block_last_index[i]= s->block_last_index[i];
2504     d->interlaced_dct= s->interlaced_dct;
2505     d->qscale= s->qscale;
2506
2507     d->esc3_level_length= s->esc3_level_length;
2508 }
2509
2510 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2511                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2512                            int *dmin, int *next_block, int motion_x, int motion_y)
2513 {
2514     int score;
2515     uint8_t *dest_backup[3];
2516
2517     copy_context_before_encode(s, backup, type);
2518
2519     s->block= s->blocks[*next_block];
2520     s->pb= pb[*next_block];
2521     if(s->data_partitioning){
2522         s->pb2   = pb2   [*next_block];
2523         s->tex_pb= tex_pb[*next_block];
2524     }
2525
2526     if(*next_block){
2527         memcpy(dest_backup, s->dest, sizeof(s->dest));
2528         s->dest[0] = s->sc.rd_scratchpad;
2529         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2530         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2531         assert(s->linesize >= 32); //FIXME
2532     }
2533
2534     encode_mb(s, motion_x, motion_y);
2535
2536     score= put_bits_count(&s->pb);
2537     if(s->data_partitioning){
2538         score+= put_bits_count(&s->pb2);
2539         score+= put_bits_count(&s->tex_pb);
2540     }
2541
2542     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2543         ff_mpv_decode_mb(s, s->block);
2544
2545         score *= s->lambda2;
2546         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2547     }
2548
2549     if(*next_block){
2550         memcpy(s->dest, dest_backup, sizeof(s->dest));
2551     }
2552
2553     if(score<*dmin){
2554         *dmin= score;
2555         *next_block^=1;
2556
2557         copy_context_after_encode(best, s, type);
2558     }
2559 }
2560
2561 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2562     uint32_t *sq = ff_square_tab + 256;
2563     int acc=0;
2564     int x,y;
2565
2566     if(w==16 && h==16)
2567         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2568     else if(w==8 && h==8)
2569         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2570
2571     for(y=0; y<h; y++){
2572         for(x=0; x<w; x++){
2573             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2574         }
2575     }
2576
2577     assert(acc>=0);
2578
2579     return acc;
2580 }
2581
2582 static int sse_mb(MpegEncContext *s){
2583     int w= 16;
2584     int h= 16;
2585
2586     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2587     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2588
2589     if(w==16 && h==16)
2590       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2591         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2592                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2593                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2594       }else{
2595         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2596                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2597                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2598       }
2599     else
2600         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2601                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2602                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2603 }
2604
2605 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2606     MpegEncContext *s= *(void**)arg;
2607
2608
2609     s->me.pre_pass=1;
2610     s->me.dia_size= s->avctx->pre_dia_size;
2611     s->first_slice_line=1;
2612     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2613         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2614             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2615         }
2616         s->first_slice_line=0;
2617     }
2618
2619     s->me.pre_pass=0;
2620
2621     return 0;
2622 }
2623
2624 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2625     MpegEncContext *s= *(void**)arg;
2626
2627     s->me.dia_size= s->avctx->dia_size;
2628     s->first_slice_line=1;
2629     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2630         s->mb_x=0; //for block init below
2631         ff_init_block_index(s);
2632         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2633             s->block_index[0]+=2;
2634             s->block_index[1]+=2;
2635             s->block_index[2]+=2;
2636             s->block_index[3]+=2;
2637
2638             /* compute motion vector & mb_type and store in context */
2639             if(s->pict_type==AV_PICTURE_TYPE_B)
2640                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2641             else
2642                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2643         }
2644         s->first_slice_line=0;
2645     }
2646     return 0;
2647 }
2648
2649 static int mb_var_thread(AVCodecContext *c, void *arg){
2650     MpegEncContext *s= *(void**)arg;
2651     int mb_x, mb_y;
2652
2653     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2654         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2655             int xx = mb_x * 16;
2656             int yy = mb_y * 16;
2657             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2658             int varc;
2659             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2660
2661             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2662                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2663
2664             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2665             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2666             s->me.mb_var_sum_temp    += varc;
2667         }
2668     }
2669     return 0;
2670 }
2671
2672 static void write_slice_end(MpegEncContext *s){
2673     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2674         if(s->partitioned_frame){
2675             ff_mpeg4_merge_partitions(s);
2676         }
2677
2678         ff_mpeg4_stuffing(&s->pb);
2679     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2680         ff_mjpeg_encode_stuffing(&s->pb);
2681     }
2682
2683     avpriv_align_put_bits(&s->pb);
2684     flush_put_bits(&s->pb);
2685
2686     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2687         s->misc_bits+= get_bits_diff(s);
2688 }
2689
2690 static void write_mb_info(MpegEncContext *s)
2691 {
2692     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2693     int offset = put_bits_count(&s->pb);
2694     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2695     int gobn = s->mb_y / s->gob_index;
2696     int pred_x, pred_y;
2697     if (CONFIG_H263_ENCODER)
2698         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2699     bytestream_put_le32(&ptr, offset);
2700     bytestream_put_byte(&ptr, s->qscale);
2701     bytestream_put_byte(&ptr, gobn);
2702     bytestream_put_le16(&ptr, mba);
2703     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2704     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2705     /* 4MV not implemented */
2706     bytestream_put_byte(&ptr, 0); /* hmv2 */
2707     bytestream_put_byte(&ptr, 0); /* vmv2 */
2708 }
2709
2710 static void update_mb_info(MpegEncContext *s, int startcode)
2711 {
2712     if (!s->mb_info)
2713         return;
2714     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2715         s->mb_info_size += 12;
2716         s->prev_mb_info = s->last_mb_info;
2717     }
2718     if (startcode) {
2719         s->prev_mb_info = put_bits_count(&s->pb)/8;
2720         /* This might have incremented mb_info_size above, and we return without
2721          * actually writing any info into that slot yet. But in that case,
2722          * this will be called again at the start of the after writing the
2723          * start code, actually writing the mb info. */
2724         return;
2725     }
2726
2727     s->last_mb_info = put_bits_count(&s->pb)/8;
2728     if (!s->mb_info_size)
2729         s->mb_info_size += 12;
2730     write_mb_info(s);
2731 }
2732
2733 static int encode_thread(AVCodecContext *c, void *arg){
2734     MpegEncContext *s= *(void**)arg;
2735     int mb_x, mb_y;
2736     int chr_h= 16>>s->chroma_y_shift;
2737     int i, j;
2738     MpegEncContext best_s = { 0 }, backup_s;
2739     uint8_t bit_buf[2][MAX_MB_BYTES];
2740     uint8_t bit_buf2[2][MAX_MB_BYTES];
2741     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2742     PutBitContext pb[2], pb2[2], tex_pb[2];
2743
2744     for(i=0; i<2; i++){
2745         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2746         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2747         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2748     }
2749
2750     s->last_bits= put_bits_count(&s->pb);
2751     s->mv_bits=0;
2752     s->misc_bits=0;
2753     s->i_tex_bits=0;
2754     s->p_tex_bits=0;
2755     s->i_count=0;
2756     s->f_count=0;
2757     s->b_count=0;
2758     s->skip_count=0;
2759
2760     for(i=0; i<3; i++){
2761         /* init last dc values */
2762         /* note: quant matrix value (8) is implied here */
2763         s->last_dc[i] = 128 << s->intra_dc_precision;
2764
2765         s->current_picture.encoding_error[i] = 0;
2766     }
2767     s->mb_skip_run = 0;
2768     memset(s->last_mv, 0, sizeof(s->last_mv));
2769
2770     s->last_mv_dir = 0;
2771
2772     switch(s->codec_id){
2773     case AV_CODEC_ID_H263:
2774     case AV_CODEC_ID_H263P:
2775     case AV_CODEC_ID_FLV1:
2776         if (CONFIG_H263_ENCODER)
2777             s->gob_index = H263_GOB_HEIGHT(s->height);
2778         break;
2779     case AV_CODEC_ID_MPEG4:
2780         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2781             ff_mpeg4_init_partitions(s);
2782         break;
2783     }
2784
2785     s->resync_mb_x=0;
2786     s->resync_mb_y=0;
2787     s->first_slice_line = 1;
2788     s->ptr_lastgob = s->pb.buf;
2789     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2790         s->mb_x=0;
2791         s->mb_y= mb_y;
2792
2793         ff_set_qscale(s, s->qscale);
2794         ff_init_block_index(s);
2795
2796         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2797             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2798             int mb_type= s->mb_type[xy];
2799 //            int d;
2800             int dmin= INT_MAX;
2801             int dir;
2802
2803             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2804                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2805                 return -1;
2806             }
2807             if(s->data_partitioning){
2808                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2809                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2810                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2811                     return -1;
2812                 }
2813             }
2814
2815             s->mb_x = mb_x;
2816             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2817             ff_update_block_index(s);
2818
2819             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2820                 ff_h261_reorder_mb_index(s);
2821                 xy= s->mb_y*s->mb_stride + s->mb_x;
2822                 mb_type= s->mb_type[xy];
2823             }
2824
2825             /* write gob / video packet header  */
2826             if(s->rtp_mode){
2827                 int current_packet_size, is_gob_start;
2828
2829                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2830
2831                 is_gob_start = s->rtp_payload_size &&
2832                                current_packet_size >= s->rtp_payload_size &&
2833                                mb_y + mb_x > 0;
2834
2835                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2836
2837                 switch(s->codec_id){
2838                 case AV_CODEC_ID_H263:
2839                 case AV_CODEC_ID_H263P:
2840                     if(!s->h263_slice_structured)
2841                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2842                     break;
2843                 case AV_CODEC_ID_MPEG2VIDEO:
2844                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2845                 case AV_CODEC_ID_MPEG1VIDEO:
2846                     if(s->mb_skip_run) is_gob_start=0;
2847                     break;
2848                 }
2849
2850                 if(is_gob_start){
2851                     if(s->start_mb_y != mb_y || mb_x!=0){
2852                         write_slice_end(s);
2853
2854                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2855                             ff_mpeg4_init_partitions(s);
2856                         }
2857                     }
2858
2859                     assert((put_bits_count(&s->pb)&7) == 0);
2860                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2861
2862                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2863                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2864                         int d = 100 / s->error_rate;
2865                         if(r % d == 0){
2866                             current_packet_size=0;
2867                             s->pb.buf_ptr= s->ptr_lastgob;
2868                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2869                         }
2870                     }
2871
2872 #if FF_API_RTP_CALLBACK
2873 FF_DISABLE_DEPRECATION_WARNINGS
2874                     if (s->avctx->rtp_callback){
2875                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2876                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2877                     }
2878 FF_ENABLE_DEPRECATION_WARNINGS
2879 #endif
2880                     update_mb_info(s, 1);
2881
2882                     switch(s->codec_id){
2883                     case AV_CODEC_ID_MPEG4:
2884                         if (CONFIG_MPEG4_ENCODER) {
2885                             ff_mpeg4_encode_video_packet_header(s);
2886                             ff_mpeg4_clean_buffers(s);
2887                         }
2888                     break;
2889                     case AV_CODEC_ID_MPEG1VIDEO:
2890                     case AV_CODEC_ID_MPEG2VIDEO:
2891                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2892                             ff_mpeg1_encode_slice_header(s);
2893                             ff_mpeg1_clean_buffers(s);
2894                         }
2895                     break;
2896                     case AV_CODEC_ID_H263:
2897                     case AV_CODEC_ID_H263P:
2898                         if (CONFIG_H263_ENCODER)
2899                             ff_h263_encode_gob_header(s, mb_y);
2900                     break;
2901                     }
2902
2903                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2904                         int bits= put_bits_count(&s->pb);
2905                         s->misc_bits+= bits - s->last_bits;
2906                         s->last_bits= bits;
2907                     }
2908
2909                     s->ptr_lastgob += current_packet_size;
2910                     s->first_slice_line=1;
2911                     s->resync_mb_x=mb_x;
2912                     s->resync_mb_y=mb_y;
2913                 }
2914             }
2915
2916             if(  (s->resync_mb_x   == s->mb_x)
2917                && s->resync_mb_y+1 == s->mb_y){
2918                 s->first_slice_line=0;
2919             }
2920
2921             s->mb_skipped=0;
2922             s->dquant=0; //only for QP_RD
2923
2924             update_mb_info(s, 0);
2925
2926             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2927                 int next_block=0;
2928                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2929
2930                 copy_context_before_encode(&backup_s, s, -1);
2931                 backup_s.pb= s->pb;
2932                 best_s.data_partitioning= s->data_partitioning;
2933                 best_s.partitioned_frame= s->partitioned_frame;
2934                 if(s->data_partitioning){
2935                     backup_s.pb2= s->pb2;
2936                     backup_s.tex_pb= s->tex_pb;
2937                 }
2938
2939                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2940                     s->mv_dir = MV_DIR_FORWARD;
2941                     s->mv_type = MV_TYPE_16X16;
2942                     s->mb_intra= 0;
2943                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2944                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2945                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2946                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2947                 }
2948                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2949                     s->mv_dir = MV_DIR_FORWARD;
2950                     s->mv_type = MV_TYPE_FIELD;
2951                     s->mb_intra= 0;
2952                     for(i=0; i<2; i++){
2953                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2954                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2955                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2956                     }
2957                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2958                                  &dmin, &next_block, 0, 0);
2959                 }
2960                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2961                     s->mv_dir = MV_DIR_FORWARD;
2962                     s->mv_type = MV_TYPE_16X16;
2963                     s->mb_intra= 0;
2964                     s->mv[0][0][0] = 0;
2965                     s->mv[0][0][1] = 0;
2966                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2967                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2968                 }
2969                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2970                     s->mv_dir = MV_DIR_FORWARD;
2971                     s->mv_type = MV_TYPE_8X8;
2972                     s->mb_intra= 0;
2973                     for(i=0; i<4; i++){
2974                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2975                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2976                     }
2977                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2978                                  &dmin, &next_block, 0, 0);
2979                 }
2980                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2981                     s->mv_dir = MV_DIR_FORWARD;
2982                     s->mv_type = MV_TYPE_16X16;
2983                     s->mb_intra= 0;
2984                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2985                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2986                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2987                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2988                 }
2989                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2990                     s->mv_dir = MV_DIR_BACKWARD;
2991                     s->mv_type = MV_TYPE_16X16;
2992                     s->mb_intra= 0;
2993                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2994                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2995                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2996                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2997                 }
2998                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2999                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3000                     s->mv_type = MV_TYPE_16X16;
3001                     s->mb_intra= 0;
3002                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3003                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3004                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3005                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3006                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3007                                  &dmin, &next_block, 0, 0);
3008                 }
3009                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3010                     s->mv_dir = MV_DIR_FORWARD;
3011                     s->mv_type = MV_TYPE_FIELD;
3012                     s->mb_intra= 0;
3013                     for(i=0; i<2; i++){
3014                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3015                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3016                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3017                     }
3018                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3019                                  &dmin, &next_block, 0, 0);
3020                 }
3021                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3022                     s->mv_dir = MV_DIR_BACKWARD;
3023                     s->mv_type = MV_TYPE_FIELD;
3024                     s->mb_intra= 0;
3025                     for(i=0; i<2; i++){
3026                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3027                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3028                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3029                     }
3030                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3031                                  &dmin, &next_block, 0, 0);
3032                 }
3033                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3034                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3035                     s->mv_type = MV_TYPE_FIELD;
3036                     s->mb_intra= 0;
3037                     for(dir=0; dir<2; dir++){
3038                         for(i=0; i<2; i++){
3039                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3040                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3041                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3042                         }
3043                     }
3044                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3045                                  &dmin, &next_block, 0, 0);
3046                 }
3047                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3048                     s->mv_dir = 0;
3049                     s->mv_type = MV_TYPE_16X16;
3050                     s->mb_intra= 1;
3051                     s->mv[0][0][0] = 0;
3052                     s->mv[0][0][1] = 0;
3053                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3054                                  &dmin, &next_block, 0, 0);
3055                     if(s->h263_pred || s->h263_aic){
3056                         if(best_s.mb_intra)
3057                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3058                         else
3059                             ff_clean_intra_table_entries(s); //old mode?
3060                     }
3061                 }
3062
3063                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3064                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3065                         const int last_qp= backup_s.qscale;
3066                         int qpi, qp, dc[6];
3067                         int16_t ac[6][16];
3068                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3069                         static const int dquant_tab[4]={-1,1,-2,2};
3070
3071                         assert(backup_s.dquant == 0);
3072
3073                         //FIXME intra
3074                         s->mv_dir= best_s.mv_dir;
3075                         s->mv_type = MV_TYPE_16X16;
3076                         s->mb_intra= best_s.mb_intra;
3077                         s->mv[0][0][0] = best_s.mv[0][0][0];
3078                         s->mv[0][0][1] = best_s.mv[0][0][1];
3079                         s->mv[1][0][0] = best_s.mv[1][0][0];
3080                         s->mv[1][0][1] = best_s.mv[1][0][1];
3081
3082                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3083                         for(; qpi<4; qpi++){
3084                             int dquant= dquant_tab[qpi];
3085                             qp= last_qp + dquant;
3086                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3087                                 continue;
3088                             backup_s.dquant= dquant;
3089                             if(s->mb_intra && s->dc_val[0]){
3090                                 for(i=0; i<6; i++){
3091                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3092                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3093                                 }
3094                             }
3095
3096                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3097                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3098                             if(best_s.qscale != qp){
3099                                 if(s->mb_intra && s->dc_val[0]){
3100                                     for(i=0; i<6; i++){
3101                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3102                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3103                                     }
3104                                 }
3105                             }
3106                         }
3107                     }
3108                 }
3109                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3110                     int mx= s->b_direct_mv_table[xy][0];
3111                     int my= s->b_direct_mv_table[xy][1];
3112
3113                     backup_s.dquant = 0;
3114                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3115                     s->mb_intra= 0;
3116                     ff_mpeg4_set_direct_mv(s, mx, my);
3117                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3118                                  &dmin, &next_block, mx, my);
3119                 }
3120                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3121                     backup_s.dquant = 0;
3122                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3123                     s->mb_intra= 0;
3124                     ff_mpeg4_set_direct_mv(s, 0, 0);
3125                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3126                                  &dmin, &next_block, 0, 0);
3127                 }
3128                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3129                     int coded=0;
3130                     for(i=0; i<6; i++)
3131                         coded |= s->block_last_index[i];
3132                     if(coded){
3133                         int mx,my;
3134                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3135                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3136                             mx=my=0; //FIXME find the one we actually used
3137                             ff_mpeg4_set_direct_mv(s, mx, my);
3138                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3139                             mx= s->mv[1][0][0];
3140                             my= s->mv[1][0][1];
3141                         }else{
3142                             mx= s->mv[0][0][0];
3143                             my= s->mv[0][0][1];
3144                         }
3145
3146                         s->mv_dir= best_s.mv_dir;
3147                         s->mv_type = best_s.mv_type;
3148                         s->mb_intra= 0;
3149 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3150                         s->mv[0][0][1] = best_s.mv[0][0][1];
3151                         s->mv[1][0][0] = best_s.mv[1][0][0];
3152                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3153                         backup_s.dquant= 0;
3154                         s->skipdct=1;
3155                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3156                                         &dmin, &next_block, mx, my);
3157                         s->skipdct=0;
3158                     }
3159                 }
3160
3161                 s->current_picture.qscale_table[xy] = best_s.qscale;
3162
3163                 copy_context_after_encode(s, &best_s, -1);
3164
3165                 pb_bits_count= put_bits_count(&s->pb);
3166                 flush_put_bits(&s->pb);
3167                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3168                 s->pb= backup_s.pb;
3169
3170                 if(s->data_partitioning){
3171                     pb2_bits_count= put_bits_count(&s->pb2);
3172                     flush_put_bits(&s->pb2);
3173                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3174                     s->pb2= backup_s.pb2;
3175
3176                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3177                     flush_put_bits(&s->tex_pb);
3178                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3179                     s->tex_pb= backup_s.tex_pb;
3180                 }
3181                 s->last_bits= put_bits_count(&s->pb);
3182
3183                 if (CONFIG_H263_ENCODER &&
3184                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3185                     ff_h263_update_motion_val(s);
3186
3187                 if(next_block==0){ //FIXME 16 vs linesize16
3188                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3189                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3190                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3191                 }
3192
3193                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3194                     ff_mpv_decode_mb(s, s->block);
3195             } else {
3196                 int motion_x = 0, motion_y = 0;
3197                 s->mv_type=MV_TYPE_16X16;
3198                 // only one MB-Type possible
3199
3200                 switch(mb_type){
3201                 case CANDIDATE_MB_TYPE_INTRA:
3202                     s->mv_dir = 0;
3203                     s->mb_intra= 1;
3204                     motion_x= s->mv[0][0][0] = 0;
3205                     motion_y= s->mv[0][0][1] = 0;
3206                     break;
3207                 case CANDIDATE_MB_TYPE_INTER:
3208                     s->mv_dir = MV_DIR_FORWARD;
3209                     s->mb_intra= 0;
3210                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3211                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3212                     break;
3213                 case CANDIDATE_MB_TYPE_INTER_I:
3214                     s->mv_dir = MV_DIR_FORWARD;
3215                     s->mv_type = MV_TYPE_FIELD;
3216                     s->mb_intra= 0;
3217                     for(i=0; i<2; i++){
3218                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3219                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3220                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3221                     }
3222                     break;
3223                 case CANDIDATE_MB_TYPE_INTER4V:
3224                     s->mv_dir = MV_DIR_FORWARD;
3225                     s->mv_type = MV_TYPE_8X8;
3226                     s->mb_intra= 0;
3227                     for(i=0; i<4; i++){
3228                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3229                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3230                     }
3231                     break;
3232                 case CANDIDATE_MB_TYPE_DIRECT:
3233                     if (CONFIG_MPEG4_ENCODER) {
3234                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3235                         s->mb_intra= 0;
3236                         motion_x=s->b_direct_mv_table[xy][0];
3237                         motion_y=s->b_direct_mv_table[xy][1];
3238                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3239                     }
3240                     break;
3241                 case CANDIDATE_MB_TYPE_DIRECT0:
3242                     if (CONFIG_MPEG4_ENCODER) {
3243                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3244                         s->mb_intra= 0;
3245                         ff_mpeg4_set_direct_mv(s, 0, 0);
3246                     }
3247                     break;
3248                 case CANDIDATE_MB_TYPE_BIDIR:
3249                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3250                     s->mb_intra= 0;
3251                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3252                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3253                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3254                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3255                     break;
3256                 case CANDIDATE_MB_TYPE_BACKWARD:
3257                     s->mv_dir = MV_DIR_BACKWARD;
3258                     s->mb_intra= 0;
3259                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3260                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3261                     break;
3262                 case CANDIDATE_MB_TYPE_FORWARD:
3263                     s->mv_dir = MV_DIR_FORWARD;
3264                     s->mb_intra= 0;
3265                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3266                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3267                     break;
3268                 case CANDIDATE_MB_TYPE_FORWARD_I:
3269                     s->mv_dir = MV_DIR_FORWARD;
3270                     s->mv_type = MV_TYPE_FIELD;
3271                     s->mb_intra= 0;
3272                     for(i=0; i<2; i++){
3273                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3274                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3275                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3276                     }
3277                     break;
3278                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3279                     s->mv_dir = MV_DIR_BACKWARD;
3280                     s->mv_type = MV_TYPE_FIELD;
3281                     s->mb_intra= 0;
3282                     for(i=0; i<2; i++){
3283                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3284                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3285                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3286                     }
3287                     break;
3288                 case CANDIDATE_MB_TYPE_BIDIR_I:
3289                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3290                     s->mv_type = MV_TYPE_FIELD;
3291                     s->mb_intra= 0;
3292                     for(dir=0; dir<2; dir++){
3293                         for(i=0; i<2; i++){
3294                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3295                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3296                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3297                         }
3298                     }
3299                     break;
3300                 default:
3301                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3302                 }
3303
3304                 encode_mb(s, motion_x, motion_y);
3305
3306                 // RAL: Update last macroblock type
3307                 s->last_mv_dir = s->mv_dir;
3308
3309                 if (CONFIG_H263_ENCODER &&
3310                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3311                     ff_h263_update_motion_val(s);
3312
3313                 ff_mpv_decode_mb(s, s->block);
3314             }
3315
3316             /* clean the MV table in IPS frames for direct mode in B-frames */
3317             if(s->mb_intra /* && I,P,S_TYPE */){
3318                 s->p_mv_table[xy][0]=0;
3319                 s->p_mv_table[xy][1]=0;
3320             }
3321
3322             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3323                 int w= 16;
3324                 int h= 16;
3325
3326                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3327                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3328
3329                 s->current_picture.encoding_error[0] += sse(
3330                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3331                     s->dest[0], w, h, s->linesize);
3332                 s->current_picture.encoding_error[1] += sse(
3333                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3334                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3335                 s->current_picture.encoding_error[2] += sse(
3336                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3337                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3338             }
3339             if(s->loop_filter){
3340                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3341                     ff_h263_loop_filter(s);
3342             }
3343             ff_dlog(s->avctx, "MB %d %d bits\n",
3344                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3345         }
3346     }
3347
3348     //not beautiful here but we must write it before flushing so it has to be here
3349     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3350         ff_msmpeg4_encode_ext_header(s);
3351
3352     write_slice_end(s);
3353
3354 #if FF_API_RTP_CALLBACK
3355 FF_DISABLE_DEPRECATION_WARNINGS
3356     /* Send the last GOB if RTP */
3357     if (s->avctx->rtp_callback) {
3358         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3359         int pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3360         /* Call the RTP callback to send the last GOB */
3361         emms_c();
3362         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3363     }
3364 FF_ENABLE_DEPRECATION_WARNINGS
3365 #endif
3366
3367     return 0;
3368 }
3369
3370 #define MERGE(field) dst->field += src->field; src->field=0
3371 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3372     MERGE(me.scene_change_score);
3373     MERGE(me.mc_mb_var_sum_temp);
3374     MERGE(me.mb_var_sum_temp);
3375 }
3376
3377 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3378     int i;
3379
3380     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3381     MERGE(dct_count[1]);
3382     MERGE(mv_bits);
3383     MERGE(i_tex_bits);
3384     MERGE(p_tex_bits);
3385     MERGE(i_count);
3386     MERGE(f_count);
3387     MERGE(b_count);
3388     MERGE(skip_count);
3389     MERGE(misc_bits);
3390     MERGE(er.error_count);
3391     MERGE(padding_bug_score);
3392     MERGE(current_picture.encoding_error[0]);
3393     MERGE(current_picture.encoding_error[1]);
3394     MERGE(current_picture.encoding_error[2]);
3395
3396     if (dst->noise_reduction){
3397         for(i=0; i<64; i++){
3398             MERGE(dct_error_sum[0][i]);
3399             MERGE(dct_error_sum[1][i]);
3400         }
3401     }
3402
3403     assert(put_bits_count(&src->pb) % 8 ==0);
3404     assert(put_bits_count(&dst->pb) % 8 ==0);
3405     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3406     flush_put_bits(&dst->pb);
3407 }
3408
3409 static int estimate_qp(MpegEncContext *s, int dry_run){
3410     if (s->next_lambda){
3411         s->current_picture_ptr->f->quality =
3412         s->current_picture.f->quality = s->next_lambda;
3413         if(!dry_run) s->next_lambda= 0;
3414     } else if (!s->fixed_qscale) {
3415         int quality;
3416         quality = ff_rate_estimate_qscale(s, dry_run);
3417         s->current_picture_ptr->f->quality =
3418         s->current_picture.f->quality = quality;
3419         if (s->current_picture.f->quality < 0)
3420             return -1;
3421     }
3422
3423     if(s->adaptive_quant){
3424         switch(s->codec_id){
3425         case AV_CODEC_ID_MPEG4:
3426             if (CONFIG_MPEG4_ENCODER)
3427                 ff_clean_mpeg4_qscales(s);
3428             break;
3429         case AV_CODEC_ID_H263:
3430         case AV_CODEC_ID_H263P:
3431         case AV_CODEC_ID_FLV1:
3432             if (CONFIG_H263_ENCODER)
3433                 ff_clean_h263_qscales(s);
3434             break;
3435         default:
3436             ff_init_qscale_tab(s);
3437         }
3438
3439         s->lambda= s->lambda_table[0];
3440         //FIXME broken
3441     }else
3442         s->lambda = s->current_picture.f->quality;
3443     update_qscale(s);
3444     return 0;
3445 }
3446
3447 /* must be called before writing the header */
3448 static void set_frame_distances(MpegEncContext * s){
3449     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3450     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3451
3452     if(s->pict_type==AV_PICTURE_TYPE_B){
3453         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3454         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3455     }else{
3456         s->pp_time= s->time - s->last_non_b_time;
3457         s->last_non_b_time= s->time;
3458         assert(s->picture_number==0 || s->pp_time > 0);
3459     }
3460 }
3461
3462 static int encode_picture(MpegEncContext *s, int picture_number)
3463 {
3464     int i, ret;
3465     int bits;
3466     int context_count = s->slice_context_count;
3467
3468     s->picture_number = picture_number;
3469
3470     /* Reset the average MB variance */
3471     s->me.mb_var_sum_temp    =
3472     s->me.mc_mb_var_sum_temp = 0;
3473
3474     /* we need to initialize some time vars before we can encode B-frames */
3475     // RAL: Condition added for MPEG1VIDEO
3476     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3477         set_frame_distances(s);
3478     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3479         ff_set_mpeg4_time(s);
3480
3481     s->me.scene_change_score=0;
3482
3483 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3484
3485     if(s->pict_type==AV_PICTURE_TYPE_I){
3486         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3487         else                        s->no_rounding=0;
3488     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3489         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3490             s->no_rounding ^= 1;
3491     }
3492
3493     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3494         if (estimate_qp(s,1) < 0)
3495             return -1;
3496         ff_get_2pass_fcode(s);
3497     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3498         if(s->pict_type==AV_PICTURE_TYPE_B)
3499             s->lambda= s->last_lambda_for[s->pict_type];
3500         else
3501             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3502         update_qscale(s);
3503     }
3504
3505     s->mb_intra=0; //for the rate distortion & bit compare functions
3506     for(i=1; i<context_count; i++){
3507         ret = ff_update_duplicate_context(s->thread_context[i], s);
3508         if (ret < 0)
3509             return ret;
3510     }
3511
3512     if(ff_init_me(s)<0)
3513         return -1;
3514
3515     /* Estimate motion for every MB */
3516     if(s->pict_type != AV_PICTURE_TYPE_I){
3517         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3518         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3519         if (s->pict_type != AV_PICTURE_TYPE_B) {
3520             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3521                 s->me_pre == 2) {
3522                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3523             }
3524         }
3525
3526         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3527     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3528         /* I-Frame */
3529         for(i=0; i<s->mb_stride*s->mb_height; i++)
3530             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3531
3532         if(!s->fixed_qscale){
3533             /* finding spatial complexity for I-frame rate control */
3534             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3535         }
3536     }
3537     for(i=1; i<context_count; i++){
3538         merge_context_after_me(s, s->thread_context[i]);
3539     }
3540     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3541     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3542     emms_c();
3543
3544     if (s->me.scene_change_score > s->scenechange_threshold &&
3545         s->pict_type == AV_PICTURE_TYPE_P) {
3546         s->pict_type= AV_PICTURE_TYPE_I;
3547         for(i=0; i<s->mb_stride*s->mb_height; i++)
3548             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3549         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3550                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3551     }
3552
3553     if(!s->umvplus){
3554         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3555             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3556
3557             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3558                 int a,b;
3559                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3560                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3561                 s->f_code= FFMAX3(s->f_code, a, b);
3562             }
3563
3564             ff_fix_long_p_mvs(s);
3565             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3566             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3567                 int j;
3568                 for(i=0; i<2; i++){
3569                     for(j=0; j<2; j++)
3570                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3571                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3572                 }
3573             }
3574         }
3575
3576         if(s->pict_type==AV_PICTURE_TYPE_B){
3577             int a, b;
3578
3579             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3580             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3581             s->f_code = FFMAX(a, b);
3582
3583             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3584             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3585             s->b_code = FFMAX(a, b);
3586
3587             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3588             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3589             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3590             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3591             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3592                 int dir, j;
3593                 for(dir=0; dir<2; dir++){
3594                     for(i=0; i<2; i++){
3595                         for(j=0; j<2; j++){
3596                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3597                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3598                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3599                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3600                         }
3601                     }
3602                 }
3603             }
3604         }
3605     }
3606
3607     if (estimate_qp(s, 0) < 0)
3608         return -1;
3609
3610     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3611         s->pict_type == AV_PICTURE_TYPE_I &&
3612         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3613         s->qscale= 3; //reduce clipping problems
3614
3615     if (s->out_format == FMT_MJPEG) {
3616         /* for mjpeg, we do include qscale in the matrix */
3617         for(i=1;i<64;i++){
3618             int j = s->idsp.idct_permutation[i];
3619
3620             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3621         }
3622         s->y_dc_scale_table=
3623         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3624         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3625         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3626                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3627         s->qscale= 8;
3628     }
3629
3630     //FIXME var duplication
3631     s->current_picture_ptr->f->key_frame =
3632     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3633     s->current_picture_ptr->f->pict_type =
3634     s->current_picture.f->pict_type = s->pict_type;
3635
3636     if (s->current_picture.f->key_frame)
3637         s->picture_in_gop_number=0;
3638
3639     s->last_bits= put_bits_count(&s->pb);
3640     switch(s->out_format) {
3641     case FMT_MJPEG:
3642         if (CONFIG_MJPEG_ENCODER)
3643             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3644                                            s->pred, s->intra_matrix);
3645         break;
3646     case FMT_H261:
3647         if (CONFIG_H261_ENCODER)
3648             ff_h261_encode_picture_header(s, picture_number);
3649         break;
3650     case FMT_H263:
3651         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3652             ff_wmv2_encode_picture_header(s, picture_number);
3653         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3654             ff_msmpeg4_encode_picture_header(s, picture_number);
3655         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3656             ff_mpeg4_encode_picture_header(s, picture_number);
3657         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3658             ret = ff_rv10_encode_picture_header(s, picture_number);
3659             if (ret < 0)
3660                 return ret;
3661         }
3662         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3663             ff_rv20_encode_picture_header(s, picture_number);
3664         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3665             ff_flv_encode_picture_header(s, picture_number);
3666         else if (CONFIG_H263_ENCODER)
3667             ff_h263_encode_picture_header(s, picture_number);
3668         break;
3669     case FMT_MPEG1:
3670         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3671             ff_mpeg1_encode_picture_header(s, picture_number);
3672         break;
3673     default:
3674         assert(0);
3675     }
3676     bits= put_bits_count(&s->pb);
3677     s->header_bits= bits - s->last_bits;
3678
3679     for(i=1; i<context_count; i++){
3680         update_duplicate_context_after_me(s->thread_context[i], s);
3681     }
3682     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3683     for(i=1; i<context_count; i++){
3684         merge_context_after_encode(s, s->thread_context[i]);
3685     }
3686     emms_c();
3687     return 0;
3688 }
3689
3690 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3691     const int intra= s->mb_intra;
3692     int i;
3693
3694     s->dct_count[intra]++;
3695
3696     for(i=0; i<64; i++){
3697         int level= block[i];
3698
3699         if(level){
3700             if(level>0){
3701                 s->dct_error_sum[intra][i] += level;
3702                 level -= s->dct_offset[intra][i];
3703                 if(level<0) level=0;
3704             }else{
3705                 s->dct_error_sum[intra][i] -= level;
3706                 level += s->dct_offset[intra][i];
3707                 if(level>0) level=0;
3708             }
3709             block[i]= level;
3710         }
3711     }
3712 }
3713
3714 static int dct_quantize_trellis_c(MpegEncContext *s,
3715                                   int16_t *block, int n,
3716                                   int qscale, int *overflow){
3717     const int *qmat;
3718     const uint8_t *scantable= s->intra_scantable.scantable;
3719     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3720     int max=0;
3721     unsigned int threshold1, threshold2;
3722     int bias=0;
3723     int run_tab[65];
3724     int level_tab[65];
3725     int score_tab[65];
3726     int survivor[65];
3727     int survivor_count;
3728     int last_run=0;
3729     int last_level=0;
3730     int last_score= 0;
3731     int last_i;
3732     int coeff[2][64];
3733     int coeff_count[64];
3734     int qmul, qadd, start_i, last_non_zero, i, dc;
3735     const int esc_length= s->ac_esc_length;
3736     uint8_t * length;
3737     uint8_t * last_length;
3738     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3739
3740     s->fdsp.fdct(block);
3741
3742     if(s->dct_error_sum)
3743         s->denoise_dct(s, block);
3744     qmul= qscale*16;
3745     qadd= ((qscale-1)|1)*8;
3746
3747     if (s->mb_intra) {
3748         int q;
3749         if (!s->h263_aic) {
3750             if (n < 4)
3751                 q = s->y_dc_scale;
3752             else
3753                 q = s->c_dc_scale;
3754             q = q << 3;
3755         } else{
3756             /* For AIC we skip quant/dequant of INTRADC */
3757             q = 1 << 3;
3758             qadd=0;
3759         }
3760
3761         /* note: block[0] is assumed to be positive */
3762         block[0] = (block[0] + (q >> 1)) / q;
3763         start_i = 1;
3764         last_non_zero = 0;
3765         qmat = s->q_intra_matrix[qscale];
3766         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3767             bias= 1<<(QMAT_SHIFT-1);
3768         length     = s->intra_ac_vlc_length;
3769         last_length= s->intra_ac_vlc_last_length;
3770     } else {
3771         start_i = 0;
3772         last_non_zero = -1;
3773         qmat = s->q_inter_matrix[qscale];
3774         length     = s->inter_ac_vlc_length;
3775         last_length= s->inter_ac_vlc_last_length;
3776     }
3777     last_i= start_i;
3778
3779     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3780     threshold2= (threshold1<<1);
3781
3782     for(i=63; i>=start_i; i--) {
3783         const int j = scantable[i];
3784         int level = block[j] * qmat[j];
3785
3786         if(((unsigned)(level+threshold1))>threshold2){
3787             last_non_zero = i;
3788             break;
3789         }
3790     }
3791
3792     for(i=start_i; i<=last_non_zero; i++) {
3793         const int j = scantable[i];
3794         int level = block[j] * qmat[j];
3795
3796 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3797 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3798         if(((unsigned)(level+threshold1))>threshold2){
3799             if(level>0){
3800                 level= (bias + level)>>QMAT_SHIFT;
3801                 coeff[0][i]= level;
3802                 coeff[1][i]= level-1;
3803 //                coeff[2][k]= level-2;
3804             }else{
3805                 level= (bias - level)>>QMAT_SHIFT;
3806                 coeff[0][i]= -level;
3807                 coeff[1][i]= -level+1;
3808 //                coeff[2][k]= -level+2;
3809             }
3810             coeff_count[i]= FFMIN(level, 2);
3811             assert(coeff_count[i]);
3812             max |=level;
3813         }else{
3814             coeff[0][i]= (level>>31)|1;
3815             coeff_count[i]= 1;
3816         }
3817     }
3818
3819     *overflow= s->max_qcoeff < max; //overflow might have happened
3820
3821     if(last_non_zero < start_i){
3822         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3823         return last_non_zero;
3824     }
3825
3826     score_tab[start_i]= 0;
3827     survivor[0]= start_i;
3828     survivor_count= 1;
3829
3830     for(i=start_i; i<=last_non_zero; i++){
3831         int level_index, j, zero_distortion;
3832         int dct_coeff= FFABS(block[ scantable[i] ]);
3833         int best_score=256*256*256*120;
3834
3835         if (s->fdsp.fdct == ff_fdct_ifast)
3836             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3837         zero_distortion= dct_coeff*dct_coeff;
3838
3839         for(level_index=0; level_index < coeff_count[i]; level_index++){
3840             int distortion;
3841             int level= coeff[level_index][i];
3842             const int alevel= FFABS(level);
3843             int unquant_coeff;
3844
3845             assert(level);
3846
3847             if(s->out_format == FMT_H263){
3848                 unquant_coeff= alevel*qmul + qadd;
3849             } else { // MPEG-1
3850                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3851                 if(s->mb_intra){
3852                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3853                         unquant_coeff =   (unquant_coeff - 1) | 1;
3854                 }else{
3855                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3856                         unquant_coeff =   (unquant_coeff - 1) | 1;
3857                 }
3858                 unquant_coeff<<= 3;
3859             }
3860
3861             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3862             level+=64;
3863             if((level&(~127)) == 0){
3864                 for(j=survivor_count-1; j>=0; j--){
3865                     int run= i - survivor[j];
3866                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3867                     score += score_tab[i-run];
3868
3869                     if(score < best_score){
3870                         best_score= score;
3871                         run_tab[i+1]= run;
3872                         level_tab[i+1]= level-64;
3873                     }
3874                 }
3875
3876                 if(s->out_format == FMT_H263){
3877                     for(j=survivor_count-1; j>=0; j--){
3878                         int run= i - survivor[j];
3879                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3880                         score += score_tab[i-run];
3881                         if(score < last_score){
3882                             last_score= score;
3883                             last_run= run;
3884                             last_level= level-64;
3885                             last_i= i+1;
3886                         }
3887                     }
3888                 }
3889             }else{
3890                 distortion += esc_length*lambda;
3891                 for(j=survivor_count-1; j>=0; j--){
3892                     int run= i - survivor[j];
3893                     int score= distortion + score_tab[i-run];
3894
3895                     if(score < best_score){
3896                         best_score= score;
3897                         run_tab[i+1]= run;
3898                         level_tab[i+1]= level-64;
3899                     }
3900                 }
3901
3902                 if(s->out_format == FMT_H263){
3903                   for(j=survivor_count-1; j>=0; j--){
3904                         int run= i - survivor[j];
3905                         int score= distortion + score_tab[i-run];
3906                         if(score < last_score){
3907                             last_score= score;
3908                             last_run= run;
3909                             last_level= level-64;
3910                             last_i= i+1;
3911                         }
3912                     }
3913                 }
3914             }
3915         }
3916
3917         score_tab[i+1]= best_score;
3918
3919         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
3920         if(last_non_zero <= 27){
3921             for(; survivor_count; survivor_count--){
3922                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3923                     break;
3924             }
3925         }else{
3926             for(; survivor_count; survivor_count--){
3927                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3928                     break;
3929             }
3930         }
3931
3932         survivor[ survivor_count++ ]= i+1;
3933     }
3934
3935     if(s->out_format != FMT_H263){
3936         last_score= 256*256*256*120;
3937         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3938             int score= score_tab[i];
3939             if (i)
3940                 score += lambda * 2; // FIXME more exact?
3941
3942             if(score < last_score){
3943                 last_score= score;
3944                 last_i= i;
3945                 last_level= level_tab[i];
3946                 last_run= run_tab[i];
3947             }
3948         }
3949     }
3950
3951     s->coded_score[n] = last_score;
3952
3953     dc= FFABS(block[0]);
3954     last_non_zero= last_i - 1;
3955     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3956
3957     if(last_non_zero < start_i)
3958         return last_non_zero;
3959
3960     if(last_non_zero == 0 && start_i == 0){
3961         int best_level= 0;
3962         int best_score= dc * dc;
3963
3964         for(i=0; i<coeff_count[0]; i++){
3965             int level= coeff[i][0];
3966             int alevel= FFABS(level);
3967             int unquant_coeff, score, distortion;
3968
3969             if(s->out_format == FMT_H263){
3970                     unquant_coeff= (alevel*qmul + qadd)>>3;
3971             } else { // MPEG-1
3972                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3973                     unquant_coeff =   (unquant_coeff - 1) | 1;
3974             }
3975             unquant_coeff = (unquant_coeff + 4) >> 3;
3976             unquant_coeff<<= 3 + 3;
3977
3978             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3979             level+=64;
3980             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3981             else                    score= distortion + esc_length*lambda;
3982
3983             if(score < best_score){
3984                 best_score= score;
3985                 best_level= level - 64;
3986             }
3987         }
3988         block[0]= best_level;
3989         s->coded_score[n] = best_score - dc*dc;
3990         if(best_level == 0) return -1;
3991         else                return last_non_zero;
3992     }
3993
3994     i= last_i;
3995     assert(last_level);
3996
3997     block[ perm_scantable[last_non_zero] ]= last_level;
3998     i -= last_run + 1;
3999
4000     for(; i>start_i; i -= run_tab[i] + 1){
4001         block[ perm_scantable[i-1] ]= level_tab[i];
4002     }
4003
4004     return last_non_zero;
4005 }
4006
4007 //#define REFINE_STATS 1
4008 static int16_t basis[64][64];
4009
4010 static void build_basis(uint8_t *perm){
4011     int i, j, x, y;
4012     emms_c();
4013     for(i=0; i<8; i++){
4014         for(j=0; j<8; j++){
4015             for(y=0; y<8; y++){
4016                 for(x=0; x<8; x++){
4017                     double s= 0.25*(1<<BASIS_SHIFT);
4018                     int index= 8*i + j;
4019                     int perm_index= perm[index];
4020                     if(i==0) s*= sqrt(0.5);
4021                     if(j==0) s*= sqrt(0.5);
4022                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4023                 }
4024             }
4025         }
4026     }
4027 }
4028
4029 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4030                         int16_t *block, int16_t *weight, int16_t *orig,
4031                         int n, int qscale){
4032     int16_t rem[64];
4033     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4034     const uint8_t *scantable= s->intra_scantable.scantable;
4035     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4036 //    unsigned int threshold1, threshold2;
4037 //    int bias=0;
4038     int run_tab[65];
4039     int prev_run=0;
4040     int prev_level=0;
4041     int qmul, qadd, start_i, last_non_zero, i, dc;
4042     uint8_t * length;
4043     uint8_t * last_length;
4044     int lambda;
4045     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4046 #ifdef REFINE_STATS
4047 static int count=0;
4048 static int after_last=0;
4049 static int to_zero=0;
4050 static int from_zero=0;
4051 static int raise=0;
4052 static int lower=0;
4053 static int messed_sign=0;
4054 #endif
4055
4056     if(basis[0][0] == 0)
4057         build_basis(s->idsp.idct_permutation);
4058
4059     qmul= qscale*2;
4060     qadd= (qscale-1)|1;
4061     if (s->mb_intra) {
4062         if (!s->h263_aic) {
4063             if (n < 4)
4064                 q = s->y_dc_scale;
4065             else
4066                 q = s->c_dc_scale;
4067         } else{
4068             /* For AIC we skip quant/dequant of INTRADC */
4069             q = 1;
4070             qadd=0;
4071         }
4072         q <<= RECON_SHIFT-3;
4073         /* note: block[0] is assumed to be positive */
4074         dc= block[0]*q;
4075 //        block[0] = (block[0] + (q >> 1)) / q;
4076         start_i = 1;
4077 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4078 //            bias= 1<<(QMAT_SHIFT-1);
4079         length     = s->intra_ac_vlc_length;
4080         last_length= s->intra_ac_vlc_last_length;
4081     } else {
4082         dc= 0;
4083         start_i = 0;
4084         length     = s->inter_ac_vlc_length;
4085         last_length= s->inter_ac_vlc_last_length;
4086     }
4087     last_non_zero = s->block_last_index[n];
4088
4089 #ifdef REFINE_STATS
4090 {START_TIMER
4091 #endif
4092     dc += (1<<(RECON_SHIFT-1));
4093     for(i=0; i<64; i++){
4094         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4095     }
4096 #ifdef REFINE_STATS
4097 STOP_TIMER("memset rem[]")}
4098 #endif
4099     sum=0;
4100     for(i=0; i<64; i++){
4101         int one= 36;
4102         int qns=4;
4103         int w;
4104
4105         w= FFABS(weight[i]) + qns*one;
4106         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4107
4108         weight[i] = w;
4109 //        w=weight[i] = (63*qns + (w/2)) / w;
4110
4111         assert(w>0);
4112         assert(w<(1<<6));
4113         sum += w*w;
4114     }
4115     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4116 #ifdef REFINE_STATS
4117 {START_TIMER
4118 #endif
4119     run=0;
4120     rle_index=0;
4121     for(i=start_i; i<=last_non_zero; i++){
4122         int j= perm_scantable[i];
4123         const int level= block[j];
4124         int coeff;
4125
4126         if(level){
4127             if(level<0) coeff= qmul*level - qadd;
4128             else        coeff= qmul*level + qadd;
4129             run_tab[rle_index++]=run;
4130             run=0;
4131
4132             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4133         }else{
4134             run++;
4135         }
4136     }
4137 #ifdef REFINE_STATS
4138 if(last_non_zero>0){
4139 STOP_TIMER("init rem[]")
4140 }
4141 }
4142
4143 {START_TIMER
4144 #endif
4145     for(;;){
4146         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4147         int best_coeff=0;
4148         int best_change=0;
4149         int run2, best_unquant_change=0, analyze_gradient;
4150 #ifdef REFINE_STATS
4151 {START_TIMER
4152 #endif
4153         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4154
4155         if(analyze_gradient){
4156 #ifdef REFINE_STATS
4157 {START_TIMER
4158 #endif
4159             for(i=0; i<64; i++){
4160                 int w= weight[i];
4161
4162                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4163             }
4164 #ifdef REFINE_STATS
4165 STOP_TIMER("rem*w*w")}
4166 {START_TIMER
4167 #endif
4168             s->fdsp.fdct(d1);
4169 #ifdef REFINE_STATS
4170 STOP_TIMER("dct")}
4171 #endif
4172         }
4173
4174         if(start_i){
4175             const int level= block[0];
4176             int change, old_coeff;
4177
4178             assert(s->mb_intra);
4179
4180             old_coeff= q*level;
4181
4182             for(change=-1; change<=1; change+=2){
4183                 int new_level= level + change;
4184                 int score, new_coeff;
4185
4186                 new_coeff= q*new_level;
4187                 if(new_coeff >= 2048 || new_coeff < 0)
4188                     continue;
4189
4190                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4191                                                   new_coeff - old_coeff);
4192                 if(score<best_score){
4193                     best_score= score;
4194                     best_coeff= 0;
4195                     best_change= change;
4196                     best_unquant_change= new_coeff - old_coeff;
4197                 }
4198             }
4199         }
4200
4201         run=0;
4202         rle_index=0;
4203         run2= run_tab[rle_index++];
4204         prev_level=0;
4205         prev_run=0;
4206
4207         for(i=start_i; i<64; i++){
4208             int j= perm_scantable[i];
4209             const int level= block[j];
4210             int change, old_coeff;
4211
4212             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4213                 break;
4214
4215             if(level){
4216                 if(level<0) old_coeff= qmul*level - qadd;
4217                 else        old_coeff= qmul*level + qadd;
4218                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4219             }else{
4220                 old_coeff=0;
4221                 run2--;
4222                 assert(run2>=0 || i >= last_non_zero );
4223             }
4224
4225             for(change=-1; change<=1; change+=2){
4226                 int new_level= level + change;
4227                 int score, new_coeff, unquant_change;
4228
4229                 score=0;
4230                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4231                    continue;
4232
4233                 if(new_level){
4234                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4235                     else            new_coeff= qmul*new_level + qadd;
4236                     if(new_coeff >= 2048 || new_coeff <= -2048)
4237                         continue;
4238                     //FIXME check for overflow
4239
4240                     if(level){
4241                         if(level < 63 && level > -63){
4242                             if(i < last_non_zero)
4243                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4244                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4245                             else
4246                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4247                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4248                         }
4249                     }else{
4250                         assert(FFABS(new_level)==1);
4251
4252                         if(analyze_gradient){
4253                             int g= d1[ scantable[i] ];
4254                             if(g && (g^new_level) >= 0)
4255                                 continue;
4256                         }
4257
4258                         if(i < last_non_zero){
4259                             int next_i= i + run2 + 1;
4260                             int next_level= block[ perm_scantable[next_i] ] + 64;
4261
4262                             if(next_level&(~127))
4263                                 next_level= 0;
4264
4265                             if(next_i < last_non_zero)
4266                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4267                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4268                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4269                             else
4270                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4271                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4272                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4273                         }else{
4274                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4275                             if(prev_level){
4276                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4277                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4278                             }
4279                         }
4280                     }
4281                 }else{
4282                     new_coeff=0;
4283                     assert(FFABS(level)==1);
4284
4285                     if(i < last_non_zero){
4286                         int next_i= i + run2 + 1;
4287                         int next_level= block[ perm_scantable[next_i] ] + 64;
4288
4289                         if(next_level&(~127))
4290                             next_level= 0;
4291
4292                         if(next_i < last_non_zero)
4293                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4294                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4295                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4296                         else
4297                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4298                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4299                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4300                     }else{
4301                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4302                         if(prev_level){
4303                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4304                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4305                         }
4306                     }
4307                 }
4308
4309                 score *= lambda;
4310
4311                 unquant_change= new_coeff - old_coeff;
4312                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4313
4314                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4315                                                    unquant_change);
4316                 if(score<best_score){
4317                     best_score= score;
4318                     best_coeff= i;
4319                     best_change= change;
4320                     best_unquant_change= unquant_change;
4321                 }
4322             }
4323             if(level){
4324                 prev_level= level + 64;
4325                 if(prev_level&(~127))
4326                     prev_level= 0;
4327                 prev_run= run;
4328                 run=0;
4329             }else{
4330                 run++;
4331             }
4332         }
4333 #ifdef REFINE_STATS
4334 STOP_TIMER("iterative step")}
4335 #endif
4336
4337         if(best_change){
4338             int j= perm_scantable[ best_coeff ];
4339
4340             block[j] += best_change;
4341
4342             if(best_coeff > last_non_zero){
4343                 last_non_zero= best_coeff;
4344                 assert(block[j]);
4345 #ifdef REFINE_STATS
4346 after_last++;
4347 #endif
4348             }else{
4349 #ifdef REFINE_STATS
4350 if(block[j]){
4351     if(block[j] - best_change){
4352         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4353             raise++;
4354         }else{
4355             lower++;
4356         }
4357     }else{
4358         from_zero++;
4359     }
4360 }else{
4361     to_zero++;
4362 }
4363 #endif
4364                 for(; last_non_zero>=start_i; last_non_zero--){
4365                     if(block[perm_scantable[last_non_zero]])
4366                         break;
4367                 }
4368             }
4369 #ifdef REFINE_STATS
4370 count++;
4371 if(256*256*256*64 % count == 0){
4372     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4373 }
4374 #endif
4375             run=0;
4376             rle_index=0;
4377             for(i=start_i; i<=last_non_zero; i++){
4378                 int j= perm_scantable[i];
4379                 const int level= block[j];
4380
4381                  if(level){
4382                      run_tab[rle_index++]=run;
4383                      run=0;
4384                  }else{
4385                      run++;
4386                  }
4387             }
4388
4389             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4390         }else{
4391             break;
4392         }
4393     }
4394 #ifdef REFINE_STATS
4395 if(last_non_zero>0){
4396 STOP_TIMER("iterative search")
4397 }
4398 }
4399 #endif
4400
4401     return last_non_zero;
4402 }
4403
4404 /**
4405  * Permute an 8x8 block according to permutation.
4406  * @param block the block which will be permuted according to
4407  *              the given permutation vector
4408  * @param permutation the permutation vector
4409  * @param last the last non zero coefficient in scantable order, used to
4410  *             speed the permutation up
4411  * @param scantable the used scantable, this is only used to speed the
4412  *                  permutation up, the block is not (inverse) permutated
4413  *                  to scantable order!
4414  */
4415 static void block_permute(int16_t *block, uint8_t *permutation,
4416                           const uint8_t *scantable, int last)
4417 {
4418     int i;
4419     int16_t temp[64];
4420
4421     if (last <= 0)
4422         return;
4423     //FIXME it is ok but not clean and might fail for some permutations
4424     // if (permutation[1] == 1)
4425     // return;
4426
4427     for (i = 0; i <= last; i++) {
4428         const int j = scantable[i];
4429         temp[j] = block[j];
4430         block[j] = 0;
4431     }
4432
4433     for (i = 0; i <= last; i++) {
4434         const int j = scantable[i];
4435         const int perm_j = permutation[j];
4436         block[perm_j] = temp[j];
4437     }
4438 }
4439
4440 int ff_dct_quantize_c(MpegEncContext *s,
4441                         int16_t *block, int n,
4442                         int qscale, int *overflow)
4443 {
4444     int i, j, level, last_non_zero, q, start_i;
4445     const int *qmat;
4446     const uint8_t *scantable= s->intra_scantable.scantable;
4447     int bias;
4448     int max=0;
4449     unsigned int threshold1, threshold2;
4450
4451     s->fdsp.fdct(block);
4452
4453     if(s->dct_error_sum)
4454         s->denoise_dct(s, block);
4455
4456     if (s->mb_intra) {
4457         if (!s->h263_aic) {
4458             if (n < 4)
4459                 q = s->y_dc_scale;
4460             else
4461                 q = s->c_dc_scale;
4462             q = q << 3;
4463         } else
4464             /* For AIC we skip quant/dequant of INTRADC */
4465             q = 1 << 3;
4466
4467         /* note: block[0] is assumed to be positive */
4468         block[0] = (block[0] + (q >> 1)) / q;
4469         start_i = 1;
4470         last_non_zero = 0;
4471         qmat = s->q_intra_matrix[qscale];
4472         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4473     } else {
4474         start_i = 0;
4475         last_non_zero = -1;
4476         qmat = s->q_inter_matrix[qscale];
4477         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4478     }
4479     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4480     threshold2= (threshold1<<1);
4481     for(i=63;i>=start_i;i--) {
4482         j = scantable[i];
4483         level = block[j] * qmat[j];
4484
4485         if(((unsigned)(level+threshold1))>threshold2){
4486             last_non_zero = i;
4487             break;
4488         }else{
4489             block[j]=0;
4490         }
4491     }
4492     for(i=start_i; i<=last_non_zero; i++) {
4493         j = scantable[i];
4494         level = block[j] * qmat[j];
4495
4496 //        if(   bias+level >= (1<<QMAT_SHIFT)
4497 //           || bias-level >= (1<<QMAT_SHIFT)){
4498         if(((unsigned)(level+threshold1))>threshold2){
4499             if(level>0){
4500                 level= (bias + level)>>QMAT_SHIFT;
4501                 block[j]= level;
4502             }else{
4503                 level= (bias - level)>>QMAT_SHIFT;
4504                 block[j]= -level;
4505             }
4506             max |=level;
4507         }else{
4508             block[j]=0;
4509         }
4510     }
4511     *overflow= s->max_qcoeff < max; //overflow might have happened
4512
4513     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4514     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4515         block_permute(block, s->idsp.idct_permutation,
4516                       scantable, last_non_zero);
4517
4518     return last_non_zero;
4519 }
4520
4521 #define OFFSET(x) offsetof(MpegEncContext, x)
4522 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4523 static const AVOption h263_options[] = {
4524     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4525     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4526     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4527     FF_MPV_COMMON_OPTS
4528     { NULL },
4529 };
4530
4531 static const AVClass h263_class = {
4532     .class_name = "H.263 encoder",
4533     .item_name  = av_default_item_name,
4534     .option     = h263_options,
4535     .version    = LIBAVUTIL_VERSION_INT,
4536 };
4537
4538 AVCodec ff_h263_encoder = {
4539     .name           = "h263",
4540     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4541     .type           = AVMEDIA_TYPE_VIDEO,
4542     .id             = AV_CODEC_ID_H263,
4543     .priv_data_size = sizeof(MpegEncContext),
4544     .init           = ff_mpv_encode_init,
4545     .encode2        = ff_mpv_encode_picture,
4546     .close          = ff_mpv_encode_end,
4547     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4548     .priv_class     = &h263_class,
4549 };
4550
4551 static const AVOption h263p_options[] = {
4552     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4553     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4554     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4555     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4556     FF_MPV_COMMON_OPTS
4557     { NULL },
4558 };
4559 static const AVClass h263p_class = {
4560     .class_name = "H.263p encoder",
4561     .item_name  = av_default_item_name,
4562     .option     = h263p_options,
4563     .version    = LIBAVUTIL_VERSION_INT,
4564 };
4565
4566 AVCodec ff_h263p_encoder = {
4567     .name           = "h263p",
4568     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4569     .type           = AVMEDIA_TYPE_VIDEO,
4570     .id             = AV_CODEC_ID_H263P,
4571     .priv_data_size = sizeof(MpegEncContext),
4572     .init           = ff_mpv_encode_init,
4573     .encode2        = ff_mpv_encode_picture,
4574     .close          = ff_mpv_encode_end,
4575     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4576     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4577     .priv_class     = &h263p_class,
4578 };
4579
4580 static const AVClass msmpeg4v2_class = {
4581     .class_name = "msmpeg4v2 encoder",
4582     .item_name  = av_default_item_name,
4583     .option     = ff_mpv_generic_options,
4584     .version    = LIBAVUTIL_VERSION_INT,
4585 };
4586
4587 AVCodec ff_msmpeg4v2_encoder = {
4588     .name           = "msmpeg4v2",
4589     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4590     .type           = AVMEDIA_TYPE_VIDEO,
4591     .id             = AV_CODEC_ID_MSMPEG4V2,
4592     .priv_data_size = sizeof(MpegEncContext),
4593     .init           = ff_mpv_encode_init,
4594     .encode2        = ff_mpv_encode_picture,
4595     .close          = ff_mpv_encode_end,
4596     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4597     .priv_class     = &msmpeg4v2_class,
4598 };
4599
4600 static const AVClass msmpeg4v3_class = {
4601     .class_name = "msmpeg4v3 encoder",
4602     .item_name  = av_default_item_name,
4603     .option     = ff_mpv_generic_options,
4604     .version    = LIBAVUTIL_VERSION_INT,
4605 };
4606
4607 AVCodec ff_msmpeg4v3_encoder = {
4608     .name           = "msmpeg4",
4609     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4610     .type           = AVMEDIA_TYPE_VIDEO,
4611     .id             = AV_CODEC_ID_MSMPEG4V3,
4612     .priv_data_size = sizeof(MpegEncContext),
4613     .init           = ff_mpv_encode_init,
4614     .encode2        = ff_mpv_encode_picture,
4615     .close          = ff_mpv_encode_end,
4616     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4617     .priv_class     = &msmpeg4v3_class,
4618 };
4619
4620 static const AVClass wmv1_class = {
4621     .class_name = "wmv1 encoder",
4622     .item_name  = av_default_item_name,
4623     .option     = ff_mpv_generic_options,
4624     .version    = LIBAVUTIL_VERSION_INT,
4625 };
4626
4627 AVCodec ff_wmv1_encoder = {
4628     .name           = "wmv1",
4629     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4630     .type           = AVMEDIA_TYPE_VIDEO,
4631     .id             = AV_CODEC_ID_WMV1,
4632     .priv_data_size = sizeof(MpegEncContext),
4633     .init           = ff_mpv_encode_init,
4634     .encode2        = ff_mpv_encode_picture,
4635     .close          = ff_mpv_encode_end,
4636     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4637     .priv_class     = &wmv1_class,
4638 };