]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpeg4videoenc: remove forgotten return -1
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include <limits.h>
47
48 //#undef NDEBUG
49 //#include <assert.h>
50
51 static int encode_picture(MpegEncContext *s, int picture_number);
52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
53 static int sse_mb(MpegEncContext *s);
54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
56
57 /* enable all paranoid tests for rounding, overflows, etc... */
58 //#define PARANOID
59
60 //#define DEBUG
61
62 static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
63 static uint8_t default_fcode_tab[MAX_MV*2+1];
64
65 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
66                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
67 {
68     int qscale;
69     int shift=0;
70
71     for(qscale=qmin; qscale<=qmax; qscale++){
72         int i;
73         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
74             dsp->fdct == ff_jpeg_fdct_islow_10
75 #ifdef FAAN_POSTSCALE
76             || dsp->fdct == ff_faandct
77 #endif
78             ) {
79             for(i=0;i<64;i++) {
80                 const int j= dsp->idct_permutation[i];
81                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
82                 /* 19952             <= ff_aanscales[i] * qscale * quant_matrix[i]               <= 249205026 */
83                 /* (1 << 36) / 19952 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= (1 << 36) / 249205026 */
84                 /* 3444240           >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= 275 */
85
86                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
87                                 (qscale * quant_matrix[j]));
88             }
89         } else if (dsp->fdct == fdct_ifast
90 #ifndef FAAN_POSTSCALE
91                    || dsp->fdct == ff_faandct
92 #endif
93                    ) {
94             for(i=0;i<64;i++) {
95                 const int j= dsp->idct_permutation[i];
96                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
97                 /* 19952             <= ff_aanscales[i] * qscale * quant_matrix[i]               <= 249205026 */
98                 /* (1 << 36) / 19952 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
99                 /* 3444240           >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= 275 */
100
101                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
102                                 (ff_aanscales[i] * qscale * quant_matrix[j]));
103             }
104         } else {
105             for(i=0;i<64;i++) {
106                 const int j= dsp->idct_permutation[i];
107                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
108                    So 16           <= qscale * quant_matrix[i]             <= 7905
109                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
110                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
111                 */
112                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
113 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
114                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
115
116                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
117                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
118             }
119         }
120
121         for(i=intra; i<64; i++){
122             int64_t max= 8191;
123             if (dsp->fdct == fdct_ifast
124 #ifndef FAAN_POSTSCALE
125                    || dsp->fdct == ff_faandct
126 #endif
127                    ) {
128                 max = (8191LL*ff_aanscales[i]) >> 14;
129             }
130             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
131                 shift++;
132             }
133         }
134     }
135     if(shift){
136         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger than %d, overflows possible\n", QMAT_SHIFT - shift);
137     }
138 }
139
140 static inline void update_qscale(MpegEncContext *s){
141     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
142     s->qscale= av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
143
144     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
145 }
146
147 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
148     int i;
149
150     if(matrix){
151         put_bits(pb, 1, 1);
152         for(i=0;i<64;i++) {
153             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
154         }
155     }else
156         put_bits(pb, 1, 0);
157 }
158
159 /**
160  * init s->current_picture.qscale_table from s->lambda_table
161  */
162 void ff_init_qscale_tab(MpegEncContext *s){
163     int8_t * const qscale_table = s->current_picture.f.qscale_table;
164     int i;
165
166     for(i=0; i<s->mb_num; i++){
167         unsigned int lam= s->lambda_table[ s->mb_index2xy[i] ];
168         int qp= (lam*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
169         qscale_table[ s->mb_index2xy[i] ]= av_clip(qp, s->avctx->qmin, s->avctx->qmax);
170     }
171 }
172
173 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
174     int i;
175
176     dst->pict_type              = src->pict_type;
177     dst->quality                = src->quality;
178     dst->coded_picture_number   = src->coded_picture_number;
179     dst->display_picture_number = src->display_picture_number;
180 //    dst->reference              = src->reference;
181     dst->pts                    = src->pts;
182     dst->interlaced_frame       = src->interlaced_frame;
183     dst->top_field_first        = src->top_field_first;
184
185     if(s->avctx->me_threshold){
186         if(!src->motion_val[0])
187             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
188         if(!src->mb_type)
189             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
190         if(!src->ref_index[0])
191             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
192         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
193             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
194             src->motion_subsample_log2, dst->motion_subsample_log2);
195
196         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
197
198         for(i=0; i<2; i++){
199             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
200             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
201
202             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
203                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
204             }
205             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
206                 memcpy(dst->ref_index[i], src->ref_index[i], s->mb_stride*4*s->mb_height*sizeof(int8_t));
207             }
208         }
209     }
210 }
211
212 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
213 #define COPY(a) dst->a= src->a
214     COPY(pict_type);
215     COPY(current_picture);
216     COPY(f_code);
217     COPY(b_code);
218     COPY(qscale);
219     COPY(lambda);
220     COPY(lambda2);
221     COPY(picture_in_gop_number);
222     COPY(gop_picture_number);
223     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
224     COPY(progressive_frame); //FIXME don't set in encode_header
225     COPY(partitioned_frame); //FIXME don't set in encode_header
226 #undef COPY
227 }
228
229 /**
230  * sets the given MpegEncContext to defaults for encoding.
231  * the changed fields will not depend upon the prior state of the MpegEncContext.
232  */
233 static void MPV_encode_defaults(MpegEncContext *s){
234     int i;
235     MPV_common_defaults(s);
236
237     for(i=-16; i<16; i++){
238         default_fcode_tab[i + MAX_MV]= 1;
239     }
240     s->me.mv_penalty= default_mv_penalty;
241     s->fcode_tab= default_fcode_tab;
242 }
243
244 /* init video encoder */
245 av_cold int MPV_encode_init(AVCodecContext *avctx)
246 {
247     MpegEncContext *s = avctx->priv_data;
248     int i;
249     int chroma_h_shift, chroma_v_shift;
250
251     MPV_encode_defaults(s);
252
253     switch (avctx->codec_id) {
254     case CODEC_ID_MPEG2VIDEO:
255         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
256             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
257             return -1;
258         }
259         break;
260     case CODEC_ID_LJPEG:
261         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P && avctx->pix_fmt != PIX_FMT_YUVJ444P && avctx->pix_fmt != PIX_FMT_BGRA &&
262            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P && avctx->pix_fmt != PIX_FMT_YUV444P) || avctx->strict_std_compliance>FF_COMPLIANCE_UNOFFICIAL)){
263             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
264             return -1;
265         }
266         break;
267     case CODEC_ID_MJPEG:
268         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUVJ422P &&
269            ((avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P) || avctx->strict_std_compliance>FF_COMPLIANCE_UNOFFICIAL)){
270             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
271             return -1;
272         }
273         break;
274     default:
275         if(avctx->pix_fmt != PIX_FMT_YUV420P){
276             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
277             return -1;
278         }
279     }
280
281     switch (avctx->pix_fmt) {
282     case PIX_FMT_YUVJ422P:
283     case PIX_FMT_YUV422P:
284         s->chroma_format = CHROMA_422;
285         break;
286     case PIX_FMT_YUVJ420P:
287     case PIX_FMT_YUV420P:
288     default:
289         s->chroma_format = CHROMA_420;
290         break;
291     }
292
293     s->bit_rate = avctx->bit_rate;
294     s->width = avctx->width;
295     s->height = avctx->height;
296     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
297         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
298         avctx->gop_size=600;
299     }
300     s->gop_size = avctx->gop_size;
301     s->avctx = avctx;
302     s->flags= avctx->flags;
303     s->flags2= avctx->flags2;
304     s->max_b_frames= avctx->max_b_frames;
305     s->codec_id= avctx->codec->id;
306     s->luma_elim_threshold  = avctx->luma_elim_threshold;
307     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
308     s->strict_std_compliance= avctx->strict_std_compliance;
309 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
310     if (avctx->flags & CODEC_FLAG_PART)
311         s->data_partitioning = 1;
312 #endif
313     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
314     s->mpeg_quant= avctx->mpeg_quant;
315     s->rtp_mode= !!avctx->rtp_payload_size;
316     s->intra_dc_precision= avctx->intra_dc_precision;
317     s->user_specified_pts = AV_NOPTS_VALUE;
318
319     if (s->gop_size <= 1) {
320         s->intra_only = 1;
321         s->gop_size = 12;
322     } else {
323         s->intra_only = 0;
324     }
325
326     s->me_method = avctx->me_method;
327
328     /* Fixed QSCALE */
329     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
330
331     s->adaptive_quant= (   s->avctx->lumi_masking
332                         || s->avctx->dark_masking
333                         || s->avctx->temporal_cplx_masking
334                         || s->avctx->spatial_cplx_masking
335                         || s->avctx->p_masking
336                         || s->avctx->border_masking
337                         || (s->flags&CODEC_FLAG_QP_RD))
338                        && !s->fixed_qscale;
339
340     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
341 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
342     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
343     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
344     s->q_scale_type= !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
345     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
346 #endif
347
348     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
349         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
350         return -1;
351     }
352
353     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
354         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
355     }
356
357     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
358         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
359         return -1;
360     }
361
362     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
363         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
364         return -1;
365     }
366
367     if(avctx->rc_max_rate && avctx->rc_max_rate == avctx->bit_rate && avctx->rc_max_rate != avctx->rc_min_rate){
368         av_log(avctx, AV_LOG_INFO, "impossible bitrate constraints, this will fail\n");
369     }
370
371     if(avctx->rc_buffer_size && avctx->bit_rate*(int64_t)avctx->time_base.num > avctx->rc_buffer_size * (int64_t)avctx->time_base.den){
372         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
373         return -1;
374     }
375
376     if(!s->fixed_qscale && avctx->bit_rate*av_q2d(avctx->time_base) > avctx->bit_rate_tolerance){
377         av_log(avctx, AV_LOG_ERROR, "bitrate tolerance too small for bitrate\n");
378         return -1;
379     }
380
381     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
382        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
383        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
384
385         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
386     }
387
388     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
389        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
390         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
391         return -1;
392     }
393
394     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
395         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
396         return -1;
397     }
398
399 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
400     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
401         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
402         return -1;
403     }
404 #endif
405
406     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
407         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
408         return -1;
409     }
410
411 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
412     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
413         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
414         return -1;
415     }
416 #endif
417
418     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
419         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
420         return -1;
421     }
422
423     if ((s->codec_id == CODEC_ID_MPEG4 || s->codec_id == CODEC_ID_H263 ||
424          s->codec_id == CODEC_ID_H263P) &&
425         (avctx->sample_aspect_ratio.num > 255 || avctx->sample_aspect_ratio.den > 255)) {
426         av_log(avctx, AV_LOG_WARNING, "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
427                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
428         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
429                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
430     }
431
432     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
433        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
434         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
435         return -1;
436     }
437
438     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
439         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
440         return -1;
441     }
442
443     if((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis){
444         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
445         return -1;
446     }
447
448     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
449         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
450         return -1;
451     }
452
453     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
454         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection are not supported yet, set threshold to 1000000000\n");
455         return -1;
456     }
457
458     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
459         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
460         return -1;
461     }
462
463     if(s->flags & CODEC_FLAG_LOW_DELAY){
464         if (s->codec_id != CODEC_ID_MPEG2VIDEO){
465             av_log(avctx, AV_LOG_ERROR, "low delay forcing is only available for mpeg2\n");
466             return -1;
467         }
468         if (s->max_b_frames != 0){
469             av_log(avctx, AV_LOG_ERROR, "b frames cannot be used with low delay\n");
470             return -1;
471         }
472     }
473
474     if(s->q_scale_type == 1){
475 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
476         if(s->codec_id != CODEC_ID_MPEG2VIDEO){
477             av_log(avctx, AV_LOG_ERROR, "non linear quant is only available for mpeg2\n");
478             return -1;
479         }
480 #endif
481         if(avctx->qmax > 12){
482             av_log(avctx, AV_LOG_ERROR, "non linear quant only supports qmax <= 12 currently\n");
483             return -1;
484         }
485     }
486
487     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
488        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
489        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
490         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
491         return -1;
492     }
493
494     if(s->avctx->thread_count < 1){
495         av_log(avctx, AV_LOG_ERROR, "automatic thread number detection not supported by codec, patch welcome\n");
496         return -1;
497     }
498
499     if(s->avctx->thread_count > 1)
500         s->rtp_mode= 1;
501
502     if(!avctx->time_base.den || !avctx->time_base.num){
503         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
504         return -1;
505     }
506
507     i= (INT_MAX/2+128)>>8;
508     if(avctx->me_threshold >= i){
509         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
510         return -1;
511     }
512     if(avctx->mb_threshold >= i){
513         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
514         return -1;
515     }
516
517     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
518         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
519         avctx->b_frame_strategy = 0;
520     }
521
522     i= av_gcd(avctx->time_base.den, avctx->time_base.num);
523     if(i > 1){
524         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
525         avctx->time_base.den /= i;
526         avctx->time_base.num /= i;
527 //        return -1;
528     }
529
530     if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO || s->codec_id==CODEC_ID_MJPEG){
531         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
532         s->inter_quant_bias= 0;
533     }else{
534         s->intra_quant_bias=0;
535         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
536     }
537
538     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
539         s->intra_quant_bias= avctx->intra_quant_bias;
540     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
541         s->inter_quant_bias= avctx->inter_quant_bias;
542
543     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
544
545     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
546         av_log(avctx, AV_LOG_ERROR, "timebase %d/%d not supported by MPEG 4 standard, "
547                "the maximum admitted value for the timebase denominator is %d\n",
548                s->avctx->time_base.num, s->avctx->time_base.den, (1<<16)-1);
549         return -1;
550     }
551     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
552
553     switch(avctx->codec->id) {
554     case CODEC_ID_MPEG1VIDEO:
555         s->out_format = FMT_MPEG1;
556         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
557         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
558         break;
559     case CODEC_ID_MPEG2VIDEO:
560         s->out_format = FMT_MPEG1;
561         s->low_delay= !!(s->flags & CODEC_FLAG_LOW_DELAY);
562         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
563         s->rtp_mode= 1;
564         break;
565     case CODEC_ID_LJPEG:
566     case CODEC_ID_MJPEG:
567         s->out_format = FMT_MJPEG;
568         s->intra_only = 1; /* force intra only for jpeg */
569         if(avctx->codec->id == CODEC_ID_LJPEG && avctx->pix_fmt == PIX_FMT_BGRA){
570             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
571             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
572             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
573         }else{
574             s->mjpeg_vsample[0] = 2;
575             s->mjpeg_vsample[1] = 2>>chroma_v_shift;
576             s->mjpeg_vsample[2] = 2>>chroma_v_shift;
577             s->mjpeg_hsample[0] = 2;
578             s->mjpeg_hsample[1] = 2>>chroma_h_shift;
579             s->mjpeg_hsample[2] = 2>>chroma_h_shift;
580         }
581         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER)
582             || ff_mjpeg_encode_init(s) < 0)
583             return -1;
584         avctx->delay=0;
585         s->low_delay=1;
586         break;
587     case CODEC_ID_H261:
588         if (!CONFIG_H261_ENCODER)  return -1;
589         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
590             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.261 codec.\nValid sizes are 176x144, 352x288\n", s->width, s->height);
591             return -1;
592         }
593         s->out_format = FMT_H261;
594         avctx->delay=0;
595         s->low_delay=1;
596         break;
597     case CODEC_ID_H263:
598         if (!CONFIG_H263_ENCODER)  return -1;
599         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format), s->width, s->height) == 8) {
600             av_log(avctx, AV_LOG_ERROR, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
601             return -1;
602         }
603         s->out_format = FMT_H263;
604         avctx->delay=0;
605         s->low_delay=1;
606         break;
607     case CODEC_ID_H263P:
608         s->out_format = FMT_H263;
609         s->h263_plus = 1;
610         /* Fx */
611 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
612         if (avctx->flags & CODEC_FLAG_H263P_UMV)
613             s->umvplus = 1;
614         if (avctx->flags & CODEC_FLAG_H263P_AIV)
615             s->alt_inter_vlc = 1;
616         if (avctx->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
617             s->h263_slice_structured = 1;
618 #endif
619         s->h263_aic= (avctx->flags & CODEC_FLAG_AC_PRED) ? 1:0;
620         s->modified_quant= s->h263_aic;
621         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
622         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
623
624         /* /Fx */
625         /* These are just to be sure */
626         avctx->delay=0;
627         s->low_delay=1;
628         break;
629     case CODEC_ID_FLV1:
630         s->out_format = FMT_H263;
631         s->h263_flv = 2; /* format = 1; 11-bit codes */
632         s->unrestricted_mv = 1;
633         s->rtp_mode=0; /* don't allow GOB */
634         avctx->delay=0;
635         s->low_delay=1;
636         break;
637     case CODEC_ID_RV10:
638         s->out_format = FMT_H263;
639         avctx->delay=0;
640         s->low_delay=1;
641         break;
642     case CODEC_ID_RV20:
643         s->out_format = FMT_H263;
644         avctx->delay=0;
645         s->low_delay=1;
646         s->modified_quant=1;
647         s->h263_aic=1;
648         s->h263_plus=1;
649         s->loop_filter=1;
650         s->unrestricted_mv= 0;
651         break;
652     case CODEC_ID_MPEG4:
653         s->out_format = FMT_H263;
654         s->h263_pred = 1;
655         s->unrestricted_mv = 1;
656         s->low_delay= s->max_b_frames ? 0 : 1;
657         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
658         break;
659     case CODEC_ID_MSMPEG4V2:
660         s->out_format = FMT_H263;
661         s->h263_pred = 1;
662         s->unrestricted_mv = 1;
663         s->msmpeg4_version= 2;
664         avctx->delay=0;
665         s->low_delay=1;
666         break;
667     case CODEC_ID_MSMPEG4V3:
668         s->out_format = FMT_H263;
669         s->h263_pred = 1;
670         s->unrestricted_mv = 1;
671         s->msmpeg4_version= 3;
672         s->flipflop_rounding=1;
673         avctx->delay=0;
674         s->low_delay=1;
675         break;
676     case CODEC_ID_WMV1:
677         s->out_format = FMT_H263;
678         s->h263_pred = 1;
679         s->unrestricted_mv = 1;
680         s->msmpeg4_version= 4;
681         s->flipflop_rounding=1;
682         avctx->delay=0;
683         s->low_delay=1;
684         break;
685     case CODEC_ID_WMV2:
686         s->out_format = FMT_H263;
687         s->h263_pred = 1;
688         s->unrestricted_mv = 1;
689         s->msmpeg4_version= 5;
690         s->flipflop_rounding=1;
691         avctx->delay=0;
692         s->low_delay=1;
693         break;
694     default:
695         return -1;
696     }
697
698     avctx->has_b_frames= !s->low_delay;
699
700     s->encoding = 1;
701
702     s->progressive_frame=
703     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN));
704
705     /* init */
706     if (MPV_common_init(s) < 0)
707         return -1;
708
709     if(!s->dct_quantize)
710         s->dct_quantize = dct_quantize_c;
711     if(!s->denoise_dct)
712         s->denoise_dct = denoise_dct_c;
713     s->fast_dct_quantize = s->dct_quantize;
714     if(avctx->trellis)
715         s->dct_quantize = dct_quantize_trellis_c;
716
717     if((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
718         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
719
720     s->quant_precision=5;
721
722     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
723     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
724
725     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
726         ff_h261_encode_init(s);
727     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
728         h263_encode_init(s);
729     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
730         ff_msmpeg4_encode_init(s);
731     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
732         && s->out_format == FMT_MPEG1)
733         ff_mpeg1_encode_init(s);
734
735     /* init q matrix */
736     for(i=0;i<64;i++) {
737         int j= s->dsp.idct_permutation[i];
738         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
739             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
740             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
741         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
742             s->intra_matrix[j] =
743             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
744         }else
745         { /* mpeg1/2 */
746             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
747             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
748         }
749         if(s->avctx->intra_matrix)
750             s->intra_matrix[j] = s->avctx->intra_matrix[i];
751         if(s->avctx->inter_matrix)
752             s->inter_matrix[j] = s->avctx->inter_matrix[i];
753     }
754
755     /* precompute matrix */
756     /* for mjpeg, we do include qscale in the matrix */
757     if (s->out_format != FMT_MJPEG) {
758         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
759                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
760         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
761                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
762     }
763
764     if(ff_rate_control_init(s) < 0)
765         return -1;
766
767     return 0;
768 }
769
770 av_cold int MPV_encode_end(AVCodecContext *avctx)
771 {
772     MpegEncContext *s = avctx->priv_data;
773
774     ff_rate_control_uninit(s);
775
776     MPV_common_end(s);
777     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) && s->out_format == FMT_MJPEG)
778         ff_mjpeg_encode_close(s);
779
780     av_freep(&avctx->extradata);
781
782     return 0;
783 }
784
785 static int get_sae(uint8_t *src, int ref, int stride){
786     int x,y;
787     int acc=0;
788
789     for(y=0; y<16; y++){
790         for(x=0; x<16; x++){
791             acc+= FFABS(src[x+y*stride] - ref);
792         }
793     }
794
795     return acc;
796 }
797
798 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
799     int x, y, w, h;
800     int acc=0;
801
802     w= s->width &~15;
803     h= s->height&~15;
804
805     for(y=0; y<h; y+=16){
806         for(x=0; x<w; x+=16){
807             int offset= x + y*stride;
808             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
809             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
810             int sae = get_sae(src + offset, mean, stride);
811
812             acc+= sae + 500 < sad;
813         }
814     }
815     return acc;
816 }
817
818
819 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
820     AVFrame *pic=NULL;
821     int64_t pts;
822     int i;
823     const int encoding_delay= s->max_b_frames;
824     int direct=1;
825
826     if(pic_arg){
827         pts= pic_arg->pts;
828         pic_arg->display_picture_number= s->input_picture_number++;
829
830         if(pts != AV_NOPTS_VALUE){
831             if(s->user_specified_pts != AV_NOPTS_VALUE){
832                 int64_t time= pts;
833                 int64_t last= s->user_specified_pts;
834
835                 if(time <= last){
836                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
837                     return -1;
838                 }
839             }
840             s->user_specified_pts= pts;
841         }else{
842             if(s->user_specified_pts != AV_NOPTS_VALUE){
843                 s->user_specified_pts=
844                 pts= s->user_specified_pts + 1;
845                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
846             }else{
847                 pts= pic_arg->display_picture_number;
848             }
849         }
850     }
851
852   if(pic_arg){
853     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
854     if(pic_arg->linesize[0] != s->linesize) direct=0;
855     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
856     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
857
858 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
859
860     if(direct){
861         i= ff_find_unused_picture(s, 1);
862
863         pic= (AVFrame*)&s->picture[i];
864         pic->reference= 3;
865
866         for(i=0; i<4; i++){
867             pic->data[i]= pic_arg->data[i];
868             pic->linesize[i]= pic_arg->linesize[i];
869         }
870         if(ff_alloc_picture(s, (Picture*)pic, 1) < 0){
871             return -1;
872         }
873     }else{
874         i= ff_find_unused_picture(s, 0);
875
876         pic= (AVFrame*)&s->picture[i];
877         pic->reference= 3;
878
879         if(ff_alloc_picture(s, (Picture*)pic, 0) < 0){
880             return -1;
881         }
882
883         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
884            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
885            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
886        // empty
887         }else{
888             int h_chroma_shift, v_chroma_shift;
889             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
890
891             for(i=0; i<3; i++){
892                 int src_stride= pic_arg->linesize[i];
893                 int dst_stride= i ? s->uvlinesize : s->linesize;
894                 int h_shift= i ? h_chroma_shift : 0;
895                 int v_shift= i ? v_chroma_shift : 0;
896                 int w= s->width >>h_shift;
897                 int h= s->height>>v_shift;
898                 uint8_t *src= pic_arg->data[i];
899                 uint8_t *dst= pic->data[i];
900
901                 if(!s->avctx->rc_buffer_size)
902                     dst +=INPLACE_OFFSET;
903
904                 if(src_stride==dst_stride)
905                     memcpy(dst, src, src_stride*h);
906                 else{
907                     while(h--){
908                         memcpy(dst, src, w);
909                         dst += dst_stride;
910                         src += src_stride;
911                     }
912                 }
913             }
914         }
915     }
916     copy_picture_attributes(s, pic, pic_arg);
917     pic->pts= pts; //we set this here to avoid modifiying pic_arg
918   }
919
920     /* shift buffer entries */
921     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
922         s->input_picture[i-1]= s->input_picture[i];
923
924     s->input_picture[encoding_delay]= (Picture*)pic;
925
926     return 0;
927 }
928
929 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
930     int x, y, plane;
931     int score=0;
932     int64_t score64=0;
933
934     for(plane=0; plane<3; plane++){
935         const int stride = p->f.linesize[plane];
936         const int bw= plane ? 1 : 2;
937         for(y=0; y<s->mb_height*bw; y++){
938             for(x=0; x<s->mb_width*bw; x++){
939                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0: 16;
940                 int v   = s->dsp.frame_skip_cmp[1](s, p->f.data[plane] + 8*(x + y*stride)+off, ref->f.data[plane] + 8*(x + y*stride), stride, 8);
941
942                 switch(s->avctx->frame_skip_exp){
943                     case 0: score= FFMAX(score, v); break;
944                     case 1: score+= FFABS(v);break;
945                     case 2: score+= v*v;break;
946                     case 3: score64+= FFABS(v*v*(int64_t)v);break;
947                     case 4: score64+= v*v*(int64_t)(v*v);break;
948                 }
949             }
950         }
951     }
952
953     if(score) score64= score;
954
955     if(score64 < s->avctx->frame_skip_threshold)
956         return 1;
957     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
958         return 1;
959     return 0;
960 }
961
962 static int estimate_best_b_count(MpegEncContext *s){
963     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
964     AVCodecContext *c = avcodec_alloc_context3(NULL);
965     AVFrame input[FF_MAX_B_FRAMES+2];
966     const int scale= s->avctx->brd_scale;
967     int i, j, out_size, p_lambda, b_lambda, lambda2;
968     int outbuf_size= s->width * s->height; //FIXME
969     uint8_t *outbuf= av_malloc(outbuf_size);
970     int64_t best_rd= INT64_MAX;
971     int best_b_count= -1;
972
973     assert(scale>=0 && scale <=3);
974
975 //    emms_c();
976     p_lambda= s->last_lambda_for[AV_PICTURE_TYPE_P]; //s->next_picture_ptr->quality;
977     b_lambda= s->last_lambda_for[AV_PICTURE_TYPE_B]; //p_lambda *FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
978     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
979     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
980
981     c->width = s->width >> scale;
982     c->height= s->height>> scale;
983     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
984     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
985     c->mb_decision= s->avctx->mb_decision;
986     c->me_cmp= s->avctx->me_cmp;
987     c->mb_cmp= s->avctx->mb_cmp;
988     c->me_sub_cmp= s->avctx->me_sub_cmp;
989     c->pix_fmt = PIX_FMT_YUV420P;
990     c->time_base= s->avctx->time_base;
991     c->max_b_frames= s->max_b_frames;
992
993     if (avcodec_open2(c, codec, NULL) < 0)
994         return -1;
995
996     for(i=0; i<s->max_b_frames+2; i++){
997         int ysize= c->width*c->height;
998         int csize= (c->width/2)*(c->height/2);
999         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
1000
1001         avcodec_get_frame_defaults(&input[i]);
1002         input[i].data[0]= av_malloc(ysize + 2*csize);
1003         input[i].data[1]= input[i].data[0] + ysize;
1004         input[i].data[2]= input[i].data[1] + csize;
1005         input[i].linesize[0]= c->width;
1006         input[i].linesize[1]=
1007         input[i].linesize[2]= c->width/2;
1008
1009         if(pre_input_ptr && (!i || s->input_picture[i-1])) {
1010             pre_input= *pre_input_ptr;
1011
1012             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1013                 pre_input.f.data[0] += INPLACE_OFFSET;
1014                 pre_input.f.data[1] += INPLACE_OFFSET;
1015                 pre_input.f.data[2] += INPLACE_OFFSET;
1016             }
1017
1018             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.f.data[0], pre_input.f.linesize[0], c->width,      c->height);
1019             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.f.data[1], pre_input.f.linesize[1], c->width >> 1, c->height >> 1);
1020             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.f.data[2], pre_input.f.linesize[2], c->width >> 1, c->height >> 1);
1021         }
1022     }
1023
1024     for(j=0; j<s->max_b_frames+1; j++){
1025         int64_t rd=0;
1026
1027         if(!s->input_picture[j])
1028             break;
1029
1030         c->error[0]= c->error[1]= c->error[2]= 0;
1031
1032         input[0].pict_type= AV_PICTURE_TYPE_I;
1033         input[0].quality= 1 * FF_QP2LAMBDA;
1034         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
1035 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1036
1037         for(i=0; i<s->max_b_frames+1; i++){
1038             int is_p= i % (j+1) == j || i==s->max_b_frames;
1039
1040             input[i+1].pict_type= is_p ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1041             input[i+1].quality= is_p ? p_lambda : b_lambda;
1042             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
1043             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1044         }
1045
1046         /* get the delayed frames */
1047         while(out_size){
1048             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1049             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1050         }
1051
1052         rd += c->error[0] + c->error[1] + c->error[2];
1053
1054         if(rd < best_rd){
1055             best_rd= rd;
1056             best_b_count= j;
1057         }
1058     }
1059
1060     av_freep(&outbuf);
1061     avcodec_close(c);
1062     av_freep(&c);
1063
1064     for(i=0; i<s->max_b_frames+2; i++){
1065         av_freep(&input[i].data[0]);
1066     }
1067
1068     return best_b_count;
1069 }
1070
1071 static int select_input_picture(MpegEncContext *s){
1072     int i;
1073
1074     for(i=1; i<MAX_PICTURE_COUNT; i++)
1075         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1076     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1077
1078     /* set next picture type & ordering */
1079     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1080         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1081             s->reordered_input_picture[0]= s->input_picture[0];
1082             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1083             s->reordered_input_picture[0]->f.coded_picture_number = s->coded_picture_number++;
1084         }else{
1085             int b_frames;
1086
1087             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
1088                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
1089                 //FIXME check that te gop check above is +-1 correct
1090 //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n", s->input_picture[0]->f.data[0], s->input_picture[0]->pts);
1091
1092                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1093                         for(i=0; i<4; i++)
1094                             s->input_picture[0]->f.data[i] = NULL;
1095                         s->input_picture[0]->f.type = 0;
1096                     }else{
1097                         assert(   s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER
1098                                || s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1099
1100                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
1101                     }
1102
1103                     emms_c();
1104                     ff_vbv_update(s, 0);
1105
1106                     goto no_output_pic;
1107                 }
1108             }
1109
1110             if(s->flags&CODEC_FLAG_PASS2){
1111                 for(i=0; i<s->max_b_frames+1; i++){
1112                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1113
1114                     if(pict_num >= s->rc_context.num_entries)
1115                         break;
1116                     if(!s->input_picture[i]){
1117                         s->rc_context.entry[pict_num-1].new_pict_type = AV_PICTURE_TYPE_P;
1118                         break;
1119                     }
1120
1121                     s->input_picture[i]->f.pict_type =
1122                         s->rc_context.entry[pict_num].new_pict_type;
1123                 }
1124             }
1125
1126             if(s->avctx->b_frame_strategy==0){
1127                 b_frames= s->max_b_frames;
1128                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1129             }else if(s->avctx->b_frame_strategy==1){
1130                 for(i=1; i<s->max_b_frames+1; i++){
1131                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1132                         s->input_picture[i]->b_frame_score=
1133                             get_intra_count(s, s->input_picture[i  ]->f.data[0],
1134                                                s->input_picture[i-1]->f.data[0], s->linesize) + 1;
1135                     }
1136                 }
1137                 for(i=0; i<s->max_b_frames+1; i++){
1138                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
1139                 }
1140
1141                 b_frames= FFMAX(0, i-1);
1142
1143                 /* reset scores */
1144                 for(i=0; i<b_frames+1; i++){
1145                     s->input_picture[i]->b_frame_score=0;
1146                 }
1147             }else if(s->avctx->b_frame_strategy==2){
1148                 b_frames= estimate_best_b_count(s);
1149             }else{
1150                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1151                 b_frames=0;
1152             }
1153
1154             emms_c();
1155 //static int b_count=0;
1156 //b_count+= b_frames;
1157 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1158
1159             for(i= b_frames - 1; i>=0; i--){
1160                 int type = s->input_picture[i]->f.pict_type;
1161                 if(type && type != AV_PICTURE_TYPE_B)
1162                     b_frames= i;
1163             }
1164             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B && b_frames == s->max_b_frames){
1165                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
1166             }
1167
1168             if(s->picture_in_gop_number + b_frames >= s->gop_size){
1169               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
1170                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
1171               }else{
1172                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
1173                     b_frames=0;
1174                 s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1175               }
1176             }
1177
1178             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
1179                && b_frames
1180                && s->input_picture[b_frames]->f.pict_type== AV_PICTURE_TYPE_I)
1181                 b_frames--;
1182
1183             s->reordered_input_picture[0]= s->input_picture[b_frames];
1184             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1185                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1186             s->reordered_input_picture[0]->f.coded_picture_number = s->coded_picture_number++;
1187             for(i=0; i<b_frames; i++){
1188                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1189                 s->reordered_input_picture[i + 1]->f.pict_type = AV_PICTURE_TYPE_B;
1190                 s->reordered_input_picture[i + 1]->f.coded_picture_number = s->coded_picture_number++;
1191             }
1192         }
1193     }
1194 no_output_pic:
1195     if(s->reordered_input_picture[0]){
1196         s->reordered_input_picture[0]->f.reference = s->reordered_input_picture[0]->f.pict_type!=AV_PICTURE_TYPE_B ? 3 : 0;
1197
1198         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1199
1200         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED || s->avctx->rc_buffer_size) {
1201             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
1202
1203             int i= ff_find_unused_picture(s, 0);
1204             Picture *pic= &s->picture[i];
1205
1206             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1207             if(ff_alloc_picture(s, pic, 0) < 0){
1208                 return -1;
1209             }
1210
1211             /* mark us unused / free shared pic */
1212             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1213                 s->avctx->release_buffer(s->avctx, (AVFrame*)s->reordered_input_picture[0]);
1214             for(i=0; i<4; i++)
1215                 s->reordered_input_picture[0]->f.data[i] = NULL;
1216             s->reordered_input_picture[0]->f.type = 0;
1217
1218             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
1219
1220             s->current_picture_ptr= pic;
1221         }else{
1222             // input is not a shared pix -> reuse buffer for current_pix
1223
1224             assert(   s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_USER
1225                    || s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1226
1227             s->current_picture_ptr= s->reordered_input_picture[0];
1228             for(i=0; i<4; i++){
1229                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1230             }
1231         }
1232         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1233
1234         s->picture_number = s->new_picture.f.display_picture_number;
1235 //printf("dpn:%d\n", s->picture_number);
1236     }else{
1237        memset(&s->new_picture, 0, sizeof(Picture));
1238     }
1239     return 0;
1240 }
1241
1242 int MPV_encode_picture(AVCodecContext *avctx,
1243                        unsigned char *buf, int buf_size, void *data)
1244 {
1245     MpegEncContext *s = avctx->priv_data;
1246     AVFrame *pic_arg = data;
1247     int i, stuffing_count, context_count = avctx->thread_count;
1248
1249     for(i=0; i<context_count; i++){
1250         int start_y= s->thread_context[i]->start_mb_y;
1251         int   end_y= s->thread_context[i]->  end_mb_y;
1252         int h= s->mb_height;
1253         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
1254         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
1255
1256         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1257     }
1258
1259     s->picture_in_gop_number++;
1260
1261     if(load_input_picture(s, pic_arg) < 0)
1262         return -1;
1263
1264     if(select_input_picture(s) < 0){
1265         return -1;
1266     }
1267
1268     /* output? */
1269     if (s->new_picture.f.data[0]) {
1270         s->pict_type = s->new_picture.f.pict_type;
1271 //emms_c();
1272 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1273         MPV_frame_start(s, avctx);
1274 vbv_retry:
1275         if (encode_picture(s, s->picture_number) < 0)
1276             return -1;
1277
1278         avctx->header_bits = s->header_bits;
1279         avctx->mv_bits     = s->mv_bits;
1280         avctx->misc_bits   = s->misc_bits;
1281         avctx->i_tex_bits  = s->i_tex_bits;
1282         avctx->p_tex_bits  = s->p_tex_bits;
1283         avctx->i_count     = s->i_count;
1284         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1285         avctx->skip_count  = s->skip_count;
1286
1287         MPV_frame_end(s);
1288
1289         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1290             ff_mjpeg_encode_picture_trailer(s);
1291
1292         if(avctx->rc_buffer_size){
1293             RateControlContext *rcc= &s->rc_context;
1294             int max_size= rcc->buffer_index * avctx->rc_max_available_vbv_use;
1295
1296             if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
1297                 s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
1298                 if(s->adaptive_quant){
1299                     int i;
1300                     for(i=0; i<s->mb_height*s->mb_stride; i++)
1301                         s->lambda_table[i]= FFMAX(s->lambda_table[i]+1, s->lambda_table[i]*(s->qscale+1) / s->qscale);
1302                 }
1303                 s->mb_skipped = 0;        //done in MPV_frame_start()
1304                 if(s->pict_type==AV_PICTURE_TYPE_P){ //done in encode_picture() so we must undo it
1305                     if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
1306                         s->no_rounding ^= 1;
1307                 }
1308                 if(s->pict_type!=AV_PICTURE_TYPE_B){
1309                     s->time_base= s->last_time_base;
1310                     s->last_non_b_time= s->time - s->pp_time;
1311                 }
1312 //                av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1313                 for(i=0; i<context_count; i++){
1314                     PutBitContext *pb= &s->thread_context[i]->pb;
1315                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1316                 }
1317                 goto vbv_retry;
1318             }
1319
1320             assert(s->avctx->rc_max_rate);
1321         }
1322
1323         if(s->flags&CODEC_FLAG_PASS1)
1324             ff_write_pass1_stats(s);
1325
1326         for(i=0; i<4; i++){
1327             s->current_picture_ptr->f.error[i]  = s->current_picture.f.error[i];
1328             avctx->error[i]                        += s->current_picture_ptr->f.error[i];
1329         }
1330
1331         if(s->flags&CODEC_FLAG_PASS1)
1332             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
1333         flush_put_bits(&s->pb);
1334         s->frame_bits  = put_bits_count(&s->pb);
1335
1336         stuffing_count= ff_vbv_update(s, s->frame_bits);
1337         if(stuffing_count){
1338             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
1339                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1340                 return -1;
1341             }
1342
1343             switch(s->codec_id){
1344             case CODEC_ID_MPEG1VIDEO:
1345             case CODEC_ID_MPEG2VIDEO:
1346                 while(stuffing_count--){
1347                     put_bits(&s->pb, 8, 0);
1348                 }
1349             break;
1350             case CODEC_ID_MPEG4:
1351                 put_bits(&s->pb, 16, 0);
1352                 put_bits(&s->pb, 16, 0x1C3);
1353                 stuffing_count -= 4;
1354                 while(stuffing_count--){
1355                     put_bits(&s->pb, 8, 0xFF);
1356                 }
1357             break;
1358             default:
1359                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1360             }
1361             flush_put_bits(&s->pb);
1362             s->frame_bits  = put_bits_count(&s->pb);
1363         }
1364
1365         /* update mpeg1/2 vbv_delay for CBR */
1366         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
1367            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
1368             int vbv_delay, min_delay;
1369             double inbits = s->avctx->rc_max_rate*av_q2d(s->avctx->time_base);
1370             int    minbits= s->frame_bits - 8*(s->vbv_delay_ptr - s->pb.buf - 1);
1371             double bits   = s->rc_context.buffer_index + minbits - inbits;
1372
1373             if(bits<0)
1374                 av_log(s->avctx, AV_LOG_ERROR, "Internal error, negative bits\n");
1375
1376             assert(s->repeat_first_field==0);
1377
1378             vbv_delay=     bits * 90000                               / s->avctx->rc_max_rate;
1379             min_delay= (minbits * 90000LL + s->avctx->rc_max_rate - 1)/ s->avctx->rc_max_rate;
1380
1381             vbv_delay= FFMAX(vbv_delay, min_delay);
1382
1383             assert(vbv_delay < 0xFFFF);
1384
1385             s->vbv_delay_ptr[0] &= 0xF8;
1386             s->vbv_delay_ptr[0] |= vbv_delay>>13;
1387             s->vbv_delay_ptr[1]  = vbv_delay>>5;
1388             s->vbv_delay_ptr[2] &= 0x07;
1389             s->vbv_delay_ptr[2] |= vbv_delay<<3;
1390             avctx->vbv_delay = vbv_delay*300;
1391         }
1392         s->total_bits += s->frame_bits;
1393         avctx->frame_bits  = s->frame_bits;
1394     }else{
1395         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1396         s->frame_bits=0;
1397     }
1398     assert((s->frame_bits&7)==0);
1399
1400     return s->frame_bits/8;
1401 }
1402
1403 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
1404 {
1405     static const char tab[64]=
1406         {3,2,2,1,1,1,1,1,
1407          1,1,1,1,1,1,1,1,
1408          1,1,1,1,1,1,1,1,
1409          0,0,0,0,0,0,0,0,
1410          0,0,0,0,0,0,0,0,
1411          0,0,0,0,0,0,0,0,
1412          0,0,0,0,0,0,0,0,
1413          0,0,0,0,0,0,0,0};
1414     int score=0;
1415     int run=0;
1416     int i;
1417     DCTELEM *block= s->block[n];
1418     const int last_index= s->block_last_index[n];
1419     int skip_dc;
1420
1421     if(threshold<0){
1422         skip_dc=0;
1423         threshold= -threshold;
1424     }else
1425         skip_dc=1;
1426
1427     /* Are all we could set to zero already zero? */
1428     if(last_index<=skip_dc - 1) return;
1429
1430     for(i=0; i<=last_index; i++){
1431         const int j = s->intra_scantable.permutated[i];
1432         const int level = FFABS(block[j]);
1433         if(level==1){
1434             if(skip_dc && i==0) continue;
1435             score+= tab[run];
1436             run=0;
1437         }else if(level>1){
1438             return;
1439         }else{
1440             run++;
1441         }
1442     }
1443     if(score >= threshold) return;
1444     for(i=skip_dc; i<=last_index; i++){
1445         const int j = s->intra_scantable.permutated[i];
1446         block[j]=0;
1447     }
1448     if(block[0]) s->block_last_index[n]= 0;
1449     else         s->block_last_index[n]= -1;
1450 }
1451
1452 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
1453 {
1454     int i;
1455     const int maxlevel= s->max_qcoeff;
1456     const int minlevel= s->min_qcoeff;
1457     int overflow=0;
1458
1459     if(s->mb_intra){
1460         i=1; //skip clipping of intra dc
1461     }else
1462         i=0;
1463
1464     for(;i<=last_index; i++){
1465         const int j= s->intra_scantable.permutated[i];
1466         int level = block[j];
1467
1468         if     (level>maxlevel){
1469             level=maxlevel;
1470             overflow++;
1471         }else if(level<minlevel){
1472             level=minlevel;
1473             overflow++;
1474         }
1475
1476         block[j]= level;
1477     }
1478
1479     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1480         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
1481 }
1482
1483 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride){
1484     int x, y;
1485 //FIXME optimize
1486     for(y=0; y<8; y++){
1487         for(x=0; x<8; x++){
1488             int x2, y2;
1489             int sum=0;
1490             int sqr=0;
1491             int count=0;
1492
1493             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
1494                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
1495                     int v= ptr[x2 + y2*stride];
1496                     sum += v;
1497                     sqr += v*v;
1498                     count++;
1499                 }
1500             }
1501             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
1502         }
1503     }
1504 }
1505
1506 static av_always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
1507 {
1508     int16_t weight[8][64];
1509     DCTELEM orig[8][64];
1510     const int mb_x= s->mb_x;
1511     const int mb_y= s->mb_y;
1512     int i;
1513     int skip_dct[8];
1514     int dct_offset   = s->linesize*8; //default for progressive frames
1515     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1516     int wrap_y, wrap_c;
1517
1518     for(i=0; i<mb_block_count; i++) skip_dct[i]=s->skipdct;
1519
1520     if(s->adaptive_quant){
1521         const int last_qp= s->qscale;
1522         const int mb_xy= mb_x + mb_y*s->mb_stride;
1523
1524         s->lambda= s->lambda_table[mb_xy];
1525         update_qscale(s);
1526
1527         if(!(s->flags&CODEC_FLAG_QP_RD)){
1528             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1529             s->dquant= s->qscale - last_qp;
1530
1531             if(s->out_format==FMT_H263){
1532                 s->dquant= av_clip(s->dquant, -2, 2);
1533
1534                 if(s->codec_id==CODEC_ID_MPEG4){
1535                     if(!s->mb_intra){
1536                         if(s->pict_type == AV_PICTURE_TYPE_B){
1537                             if(s->dquant&1 || s->mv_dir&MV_DIRECT)
1538                                 s->dquant= 0;
1539                         }
1540                         if(s->mv_type==MV_TYPE_8X8)
1541                             s->dquant=0;
1542                     }
1543                 }
1544             }
1545         }
1546         ff_set_qscale(s, last_qp + s->dquant);
1547     }else if(s->flags&CODEC_FLAG_QP_RD)
1548         ff_set_qscale(s, s->qscale + s->dquant);
1549
1550     wrap_y = s->linesize;
1551     wrap_c = s->uvlinesize;
1552     ptr_y  = s->new_picture.f.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
1553     ptr_cb = s->new_picture.f.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1554     ptr_cr = s->new_picture.f.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1555
1556     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
1557         uint8_t *ebuf= s->edge_emu_buffer + 32;
1558         s->dsp.emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
1559         ptr_y= ebuf;
1560         s->dsp.emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
1561         ptr_cb= ebuf+18*wrap_y;
1562         s->dsp.emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
1563         ptr_cr= ebuf+18*wrap_y+8;
1564     }
1565
1566     if (s->mb_intra) {
1567         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
1568             int progressive_score, interlaced_score;
1569
1570             s->interlaced_dct=0;
1571             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
1572                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
1573
1574             if(progressive_score > 0){
1575                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
1576                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
1577                 if(progressive_score > interlaced_score){
1578                     s->interlaced_dct=1;
1579
1580                     dct_offset= wrap_y;
1581                     wrap_y<<=1;
1582                     if (s->chroma_format == CHROMA_422)
1583                         wrap_c<<=1;
1584                 }
1585             }
1586         }
1587
1588         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
1589         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
1590         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
1591         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1592
1593         if(s->flags&CODEC_FLAG_GRAY){
1594             skip_dct[4]= 1;
1595             skip_dct[5]= 1;
1596         }else{
1597             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1598             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1599             if(!s->chroma_y_shift){ /* 422 */
1600                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
1601                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
1602             }
1603         }
1604     }else{
1605         op_pixels_func (*op_pix)[4];
1606         qpel_mc_func (*op_qpix)[16];
1607         uint8_t *dest_y, *dest_cb, *dest_cr;
1608
1609         dest_y  = s->dest[0];
1610         dest_cb = s->dest[1];
1611         dest_cr = s->dest[2];
1612
1613         if ((!s->no_rounding) || s->pict_type==AV_PICTURE_TYPE_B){
1614             op_pix = s->dsp.put_pixels_tab;
1615             op_qpix= s->dsp.put_qpel_pixels_tab;
1616         }else{
1617             op_pix = s->dsp.put_no_rnd_pixels_tab;
1618             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
1619         }
1620
1621         if (s->mv_dir & MV_DIR_FORWARD) {
1622             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data, op_pix, op_qpix);
1623             op_pix = s->dsp.avg_pixels_tab;
1624             op_qpix= s->dsp.avg_qpel_pixels_tab;
1625         }
1626         if (s->mv_dir & MV_DIR_BACKWARD) {
1627             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data, op_pix, op_qpix);
1628         }
1629
1630         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
1631             int progressive_score, interlaced_score;
1632
1633             s->interlaced_dct=0;
1634             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
1635                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
1636
1637             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
1638
1639             if(progressive_score>0){
1640                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
1641                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
1642
1643                 if(progressive_score > interlaced_score){
1644                     s->interlaced_dct=1;
1645
1646                     dct_offset= wrap_y;
1647                     wrap_y<<=1;
1648                     if (s->chroma_format == CHROMA_422)
1649                         wrap_c<<=1;
1650                 }
1651             }
1652         }
1653
1654         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
1655         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
1656         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
1657         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
1658
1659         if(s->flags&CODEC_FLAG_GRAY){
1660             skip_dct[4]= 1;
1661             skip_dct[5]= 1;
1662         }else{
1663             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1664             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1665             if(!s->chroma_y_shift){ /* 422 */
1666                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
1667                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
1668             }
1669         }
1670         /* pre quantization */
1671         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
1672             //FIXME optimize
1673             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
1674             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
1675             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
1676             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
1677             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
1678             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
1679             if(!s->chroma_y_shift){ /* 422 */
1680                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
1681                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
1682             }
1683         }
1684     }
1685
1686     if(s->avctx->quantizer_noise_shaping){
1687         if(!skip_dct[0]) get_visual_weight(weight[0], ptr_y                 , wrap_y);
1688         if(!skip_dct[1]) get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1689         if(!skip_dct[2]) get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1690         if(!skip_dct[3]) get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1691         if(!skip_dct[4]) get_visual_weight(weight[4], ptr_cb                , wrap_c);
1692         if(!skip_dct[5]) get_visual_weight(weight[5], ptr_cr                , wrap_c);
1693         if(!s->chroma_y_shift){ /* 422 */
1694             if(!skip_dct[6]) get_visual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
1695             if(!skip_dct[7]) get_visual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
1696         }
1697         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
1698     }
1699
1700     /* DCT & quantize */
1701     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
1702     {
1703         for(i=0;i<mb_block_count;i++) {
1704             if(!skip_dct[i]){
1705                 int overflow;
1706                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1707             // FIXME we could decide to change to quantizer instead of clipping
1708             // JS: I don't think that would be a good idea it could lower quality instead
1709             //     of improve it. Just INTRADC clipping deserves changes in quantizer
1710                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
1711             }else
1712                 s->block_last_index[i]= -1;
1713         }
1714         if(s->avctx->quantizer_noise_shaping){
1715             for(i=0;i<mb_block_count;i++) {
1716                 if(!skip_dct[i]){
1717                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
1718                 }
1719             }
1720         }
1721
1722         if(s->luma_elim_threshold && !s->mb_intra)
1723             for(i=0; i<4; i++)
1724                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1725         if(s->chroma_elim_threshold && !s->mb_intra)
1726             for(i=4; i<mb_block_count; i++)
1727                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1728
1729         if(s->flags & CODEC_FLAG_CBP_RD){
1730             for(i=0;i<mb_block_count;i++) {
1731                 if(s->block_last_index[i] == -1)
1732                     s->coded_score[i]= INT_MAX/256;
1733             }
1734         }
1735     }
1736
1737     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
1738         s->block_last_index[4]=
1739         s->block_last_index[5]= 0;
1740         s->block[4][0]=
1741         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
1742     }
1743
1744     //non c quantize code returns incorrect block_last_index FIXME
1745     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
1746         for(i=0; i<mb_block_count; i++){
1747             int j;
1748             if(s->block_last_index[i]>0){
1749                 for(j=63; j>0; j--){
1750                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
1751                 }
1752                 s->block_last_index[i]= j;
1753             }
1754         }
1755     }
1756
1757     /* huffman encode */
1758     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
1759     case CODEC_ID_MPEG1VIDEO:
1760     case CODEC_ID_MPEG2VIDEO:
1761         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
1762             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
1763         break;
1764     case CODEC_ID_MPEG4:
1765         if (CONFIG_MPEG4_ENCODER)
1766             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
1767         break;
1768     case CODEC_ID_MSMPEG4V2:
1769     case CODEC_ID_MSMPEG4V3:
1770     case CODEC_ID_WMV1:
1771         if (CONFIG_MSMPEG4_ENCODER)
1772             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
1773         break;
1774     case CODEC_ID_WMV2:
1775         if (CONFIG_WMV2_ENCODER)
1776             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
1777         break;
1778     case CODEC_ID_H261:
1779         if (CONFIG_H261_ENCODER)
1780             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
1781         break;
1782     case CODEC_ID_H263:
1783     case CODEC_ID_H263P:
1784     case CODEC_ID_FLV1:
1785     case CODEC_ID_RV10:
1786     case CODEC_ID_RV20:
1787         if (CONFIG_H263_ENCODER)
1788             h263_encode_mb(s, s->block, motion_x, motion_y);
1789         break;
1790     case CODEC_ID_MJPEG:
1791         if (CONFIG_MJPEG_ENCODER)
1792             ff_mjpeg_encode_mb(s, s->block);
1793         break;
1794     default:
1795         assert(0);
1796     }
1797 }
1798
1799 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
1800 {
1801     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
1802     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
1803 }
1804
1805 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
1806     int i;
1807
1808     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
1809
1810     /* mpeg1 */
1811     d->mb_skip_run= s->mb_skip_run;
1812     for(i=0; i<3; i++)
1813         d->last_dc[i]= s->last_dc[i];
1814
1815     /* statistics */
1816     d->mv_bits= s->mv_bits;
1817     d->i_tex_bits= s->i_tex_bits;
1818     d->p_tex_bits= s->p_tex_bits;
1819     d->i_count= s->i_count;
1820     d->f_count= s->f_count;
1821     d->b_count= s->b_count;
1822     d->skip_count= s->skip_count;
1823     d->misc_bits= s->misc_bits;
1824     d->last_bits= 0;
1825
1826     d->mb_skipped= 0;
1827     d->qscale= s->qscale;
1828     d->dquant= s->dquant;
1829
1830     d->esc3_level_length= s->esc3_level_length;
1831 }
1832
1833 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
1834     int i;
1835
1836     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
1837     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
1838
1839     /* mpeg1 */
1840     d->mb_skip_run= s->mb_skip_run;
1841     for(i=0; i<3; i++)
1842         d->last_dc[i]= s->last_dc[i];
1843
1844     /* statistics */
1845     d->mv_bits= s->mv_bits;
1846     d->i_tex_bits= s->i_tex_bits;
1847     d->p_tex_bits= s->p_tex_bits;
1848     d->i_count= s->i_count;
1849     d->f_count= s->f_count;
1850     d->b_count= s->b_count;
1851     d->skip_count= s->skip_count;
1852     d->misc_bits= s->misc_bits;
1853
1854     d->mb_intra= s->mb_intra;
1855     d->mb_skipped= s->mb_skipped;
1856     d->mv_type= s->mv_type;
1857     d->mv_dir= s->mv_dir;
1858     d->pb= s->pb;
1859     if(s->data_partitioning){
1860         d->pb2= s->pb2;
1861         d->tex_pb= s->tex_pb;
1862     }
1863     d->block= s->block;
1864     for(i=0; i<8; i++)
1865         d->block_last_index[i]= s->block_last_index[i];
1866     d->interlaced_dct= s->interlaced_dct;
1867     d->qscale= s->qscale;
1868
1869     d->esc3_level_length= s->esc3_level_length;
1870 }
1871
1872 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
1873                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
1874                            int *dmin, int *next_block, int motion_x, int motion_y)
1875 {
1876     int score;
1877     uint8_t *dest_backup[3];
1878
1879     copy_context_before_encode(s, backup, type);
1880
1881     s->block= s->blocks[*next_block];
1882     s->pb= pb[*next_block];
1883     if(s->data_partitioning){
1884         s->pb2   = pb2   [*next_block];
1885         s->tex_pb= tex_pb[*next_block];
1886     }
1887
1888     if(*next_block){
1889         memcpy(dest_backup, s->dest, sizeof(s->dest));
1890         s->dest[0] = s->rd_scratchpad;
1891         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
1892         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
1893         assert(s->linesize >= 32); //FIXME
1894     }
1895
1896     encode_mb(s, motion_x, motion_y);
1897
1898     score= put_bits_count(&s->pb);
1899     if(s->data_partitioning){
1900         score+= put_bits_count(&s->pb2);
1901         score+= put_bits_count(&s->tex_pb);
1902     }
1903
1904     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
1905         MPV_decode_mb(s, s->block);
1906
1907         score *= s->lambda2;
1908         score += sse_mb(s) << FF_LAMBDA_SHIFT;
1909     }
1910
1911     if(*next_block){
1912         memcpy(s->dest, dest_backup, sizeof(s->dest));
1913     }
1914
1915     if(score<*dmin){
1916         *dmin= score;
1917         *next_block^=1;
1918
1919         copy_context_after_encode(best, s, type);
1920     }
1921 }
1922
1923 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
1924     uint32_t *sq = ff_squareTbl + 256;
1925     int acc=0;
1926     int x,y;
1927
1928     if(w==16 && h==16)
1929         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
1930     else if(w==8 && h==8)
1931         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
1932
1933     for(y=0; y<h; y++){
1934         for(x=0; x<w; x++){
1935             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
1936         }
1937     }
1938
1939     assert(acc>=0);
1940
1941     return acc;
1942 }
1943
1944 static int sse_mb(MpegEncContext *s){
1945     int w= 16;
1946     int h= 16;
1947
1948     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
1949     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
1950
1951     if(w==16 && h==16)
1952       if(s->avctx->mb_cmp == FF_CMP_NSSE){
1953         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
1954                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
1955                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
1956       }else{
1957         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
1958                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
1959                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
1960       }
1961     else
1962         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
1963                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
1964                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
1965 }
1966
1967 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
1968     MpegEncContext *s= *(void**)arg;
1969
1970
1971     s->me.pre_pass=1;
1972     s->me.dia_size= s->avctx->pre_dia_size;
1973     s->first_slice_line=1;
1974     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
1975         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
1976             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
1977         }
1978         s->first_slice_line=0;
1979     }
1980
1981     s->me.pre_pass=0;
1982
1983     return 0;
1984 }
1985
1986 static int estimate_motion_thread(AVCodecContext *c, void *arg){
1987     MpegEncContext *s= *(void**)arg;
1988
1989     ff_check_alignment();
1990
1991     s->me.dia_size= s->avctx->dia_size;
1992     s->first_slice_line=1;
1993     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
1994         s->mb_x=0; //for block init below
1995         ff_init_block_index(s);
1996         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
1997             s->block_index[0]+=2;
1998             s->block_index[1]+=2;
1999             s->block_index[2]+=2;
2000             s->block_index[3]+=2;
2001
2002             /* compute motion vector & mb_type and store in context */
2003             if(s->pict_type==AV_PICTURE_TYPE_B)
2004                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2005             else
2006                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2007         }
2008         s->first_slice_line=0;
2009     }
2010     return 0;
2011 }
2012
2013 static int mb_var_thread(AVCodecContext *c, void *arg){
2014     MpegEncContext *s= *(void**)arg;
2015     int mb_x, mb_y;
2016
2017     ff_check_alignment();
2018
2019     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2020         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2021             int xx = mb_x * 16;
2022             int yy = mb_y * 16;
2023             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2024             int varc;
2025             int sum = s->dsp.pix_sum(pix, s->linesize);
2026
2027             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
2028
2029             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2030             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2031             s->me.mb_var_sum_temp    += varc;
2032         }
2033     }
2034     return 0;
2035 }
2036
2037 static void write_slice_end(MpegEncContext *s){
2038     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2039         if(s->partitioned_frame){
2040             ff_mpeg4_merge_partitions(s);
2041         }
2042
2043         ff_mpeg4_stuffing(&s->pb);
2044     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2045         ff_mjpeg_encode_stuffing(&s->pb);
2046     }
2047
2048     align_put_bits(&s->pb);
2049     flush_put_bits(&s->pb);
2050
2051     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2052         s->misc_bits+= get_bits_diff(s);
2053 }
2054
2055 static int encode_thread(AVCodecContext *c, void *arg){
2056     MpegEncContext *s= *(void**)arg;
2057     int mb_x, mb_y, pdif = 0;
2058     int chr_h= 16>>s->chroma_y_shift;
2059     int i, j;
2060     MpegEncContext best_s, backup_s;
2061     uint8_t bit_buf[2][MAX_MB_BYTES];
2062     uint8_t bit_buf2[2][MAX_MB_BYTES];
2063     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2064     PutBitContext pb[2], pb2[2], tex_pb[2];
2065 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2066
2067     ff_check_alignment();
2068
2069     for(i=0; i<2; i++){
2070         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2071         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2072         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2073     }
2074
2075     s->last_bits= put_bits_count(&s->pb);
2076     s->mv_bits=0;
2077     s->misc_bits=0;
2078     s->i_tex_bits=0;
2079     s->p_tex_bits=0;
2080     s->i_count=0;
2081     s->f_count=0;
2082     s->b_count=0;
2083     s->skip_count=0;
2084
2085     for(i=0; i<3; i++){
2086         /* init last dc values */
2087         /* note: quant matrix value (8) is implied here */
2088         s->last_dc[i] = 128 << s->intra_dc_precision;
2089
2090         s->current_picture.f.error[i] = 0;
2091     }
2092     s->mb_skip_run = 0;
2093     memset(s->last_mv, 0, sizeof(s->last_mv));
2094
2095     s->last_mv_dir = 0;
2096
2097     switch(s->codec_id){
2098     case CODEC_ID_H263:
2099     case CODEC_ID_H263P:
2100     case CODEC_ID_FLV1:
2101         if (CONFIG_H263_ENCODER)
2102             s->gob_index = ff_h263_get_gob_height(s);
2103         break;
2104     case CODEC_ID_MPEG4:
2105         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2106             ff_mpeg4_init_partitions(s);
2107         break;
2108     }
2109
2110     s->resync_mb_x=0;
2111     s->resync_mb_y=0;
2112     s->first_slice_line = 1;
2113     s->ptr_lastgob = s->pb.buf;
2114     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2115 //    printf("row %d at %X\n", s->mb_y, (int)s);
2116         s->mb_x=0;
2117         s->mb_y= mb_y;
2118
2119         ff_set_qscale(s, s->qscale);
2120         ff_init_block_index(s);
2121
2122         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2123             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2124             int mb_type= s->mb_type[xy];
2125 //            int d;
2126             int dmin= INT_MAX;
2127             int dir;
2128
2129             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2130                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2131                 return -1;
2132             }
2133             if(s->data_partitioning){
2134                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2135                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2136                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2137                     return -1;
2138                 }
2139             }
2140
2141             s->mb_x = mb_x;
2142             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2143             ff_update_block_index(s);
2144
2145             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2146                 ff_h261_reorder_mb_index(s);
2147                 xy= s->mb_y*s->mb_stride + s->mb_x;
2148                 mb_type= s->mb_type[xy];
2149             }
2150
2151             /* write gob / video packet header  */
2152             if(s->rtp_mode){
2153                 int current_packet_size, is_gob_start;
2154
2155                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2156
2157                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2158
2159                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2160
2161                 switch(s->codec_id){
2162                 case CODEC_ID_H263:
2163                 case CODEC_ID_H263P:
2164                     if(!s->h263_slice_structured)
2165                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2166                     break;
2167                 case CODEC_ID_MPEG2VIDEO:
2168                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2169                 case CODEC_ID_MPEG1VIDEO:
2170                     if(s->mb_skip_run) is_gob_start=0;
2171                     break;
2172                 }
2173
2174                 if(is_gob_start){
2175                     if(s->start_mb_y != mb_y || mb_x!=0){
2176                         write_slice_end(s);
2177
2178                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2179                             ff_mpeg4_init_partitions(s);
2180                         }
2181                     }
2182
2183                     assert((put_bits_count(&s->pb)&7) == 0);
2184                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2185
2186                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2187                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2188                         int d= 100 / s->avctx->error_rate;
2189                         if(r % d == 0){
2190                             current_packet_size=0;
2191                             s->pb.buf_ptr= s->ptr_lastgob;
2192                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2193                         }
2194                     }
2195
2196                     if (s->avctx->rtp_callback){
2197                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2198                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2199                     }
2200
2201                     switch(s->codec_id){
2202                     case CODEC_ID_MPEG4:
2203                         if (CONFIG_MPEG4_ENCODER) {
2204                             ff_mpeg4_encode_video_packet_header(s);
2205                             ff_mpeg4_clean_buffers(s);
2206                         }
2207                     break;
2208                     case CODEC_ID_MPEG1VIDEO:
2209                     case CODEC_ID_MPEG2VIDEO:
2210                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2211                             ff_mpeg1_encode_slice_header(s);
2212                             ff_mpeg1_clean_buffers(s);
2213                         }
2214                     break;
2215                     case CODEC_ID_H263:
2216                     case CODEC_ID_H263P:
2217                         if (CONFIG_H263_ENCODER)
2218                             h263_encode_gob_header(s, mb_y);
2219                     break;
2220                     }
2221
2222                     if(s->flags&CODEC_FLAG_PASS1){
2223                         int bits= put_bits_count(&s->pb);
2224                         s->misc_bits+= bits - s->last_bits;
2225                         s->last_bits= bits;
2226                     }
2227
2228                     s->ptr_lastgob += current_packet_size;
2229                     s->first_slice_line=1;
2230                     s->resync_mb_x=mb_x;
2231                     s->resync_mb_y=mb_y;
2232                 }
2233             }
2234
2235             if(  (s->resync_mb_x   == s->mb_x)
2236                && s->resync_mb_y+1 == s->mb_y){
2237                 s->first_slice_line=0;
2238             }
2239
2240             s->mb_skipped=0;
2241             s->dquant=0; //only for QP_RD
2242
2243             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2244                 int next_block=0;
2245                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2246
2247                 copy_context_before_encode(&backup_s, s, -1);
2248                 backup_s.pb= s->pb;
2249                 best_s.data_partitioning= s->data_partitioning;
2250                 best_s.partitioned_frame= s->partitioned_frame;
2251                 if(s->data_partitioning){
2252                     backup_s.pb2= s->pb2;
2253                     backup_s.tex_pb= s->tex_pb;
2254                 }
2255
2256                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2257                     s->mv_dir = MV_DIR_FORWARD;
2258                     s->mv_type = MV_TYPE_16X16;
2259                     s->mb_intra= 0;
2260                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2261                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2262                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2263                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2264                 }
2265                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2266                     s->mv_dir = MV_DIR_FORWARD;
2267                     s->mv_type = MV_TYPE_FIELD;
2268                     s->mb_intra= 0;
2269                     for(i=0; i<2; i++){
2270                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2271                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2272                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2273                     }
2274                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2275                                  &dmin, &next_block, 0, 0);
2276                 }
2277                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2278                     s->mv_dir = MV_DIR_FORWARD;
2279                     s->mv_type = MV_TYPE_16X16;
2280                     s->mb_intra= 0;
2281                     s->mv[0][0][0] = 0;
2282                     s->mv[0][0][1] = 0;
2283                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2284                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2285                 }
2286                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2287                     s->mv_dir = MV_DIR_FORWARD;
2288                     s->mv_type = MV_TYPE_8X8;
2289                     s->mb_intra= 0;
2290                     for(i=0; i<4; i++){
2291                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2292                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2293                     }
2294                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2295                                  &dmin, &next_block, 0, 0);
2296                 }
2297                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2298                     s->mv_dir = MV_DIR_FORWARD;
2299                     s->mv_type = MV_TYPE_16X16;
2300                     s->mb_intra= 0;
2301                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2302                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2303                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2304                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2305                 }
2306                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2307                     s->mv_dir = MV_DIR_BACKWARD;
2308                     s->mv_type = MV_TYPE_16X16;
2309                     s->mb_intra= 0;
2310                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2311                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2312                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2313                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2314                 }
2315                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2316                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2317                     s->mv_type = MV_TYPE_16X16;
2318                     s->mb_intra= 0;
2319                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2320                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2321                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2322                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2323                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2324                                  &dmin, &next_block, 0, 0);
2325                 }
2326                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2327                     s->mv_dir = MV_DIR_FORWARD;
2328                     s->mv_type = MV_TYPE_FIELD;
2329                     s->mb_intra= 0;
2330                     for(i=0; i<2; i++){
2331                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2332                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2333                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2334                     }
2335                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2336                                  &dmin, &next_block, 0, 0);
2337                 }
2338                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2339                     s->mv_dir = MV_DIR_BACKWARD;
2340                     s->mv_type = MV_TYPE_FIELD;
2341                     s->mb_intra= 0;
2342                     for(i=0; i<2; i++){
2343                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2344                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2345                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2346                     }
2347                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2348                                  &dmin, &next_block, 0, 0);
2349                 }
2350                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2351                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2352                     s->mv_type = MV_TYPE_FIELD;
2353                     s->mb_intra= 0;
2354                     for(dir=0; dir<2; dir++){
2355                         for(i=0; i<2; i++){
2356                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2357                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2358                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2359                         }
2360                     }
2361                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2362                                  &dmin, &next_block, 0, 0);
2363                 }
2364                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2365                     s->mv_dir = 0;
2366                     s->mv_type = MV_TYPE_16X16;
2367                     s->mb_intra= 1;
2368                     s->mv[0][0][0] = 0;
2369                     s->mv[0][0][1] = 0;
2370                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2371                                  &dmin, &next_block, 0, 0);
2372                     if(s->h263_pred || s->h263_aic){
2373                         if(best_s.mb_intra)
2374                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2375                         else
2376                             ff_clean_intra_table_entries(s); //old mode?
2377                     }
2378                 }
2379
2380                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2381                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2382                         const int last_qp= backup_s.qscale;
2383                         int qpi, qp, dc[6];
2384                         DCTELEM ac[6][16];
2385                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2386                         static const int dquant_tab[4]={-1,1,-2,2};
2387
2388                         assert(backup_s.dquant == 0);
2389
2390                         //FIXME intra
2391                         s->mv_dir= best_s.mv_dir;
2392                         s->mv_type = MV_TYPE_16X16;
2393                         s->mb_intra= best_s.mb_intra;
2394                         s->mv[0][0][0] = best_s.mv[0][0][0];
2395                         s->mv[0][0][1] = best_s.mv[0][0][1];
2396                         s->mv[1][0][0] = best_s.mv[1][0][0];
2397                         s->mv[1][0][1] = best_s.mv[1][0][1];
2398
2399                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2400                         for(; qpi<4; qpi++){
2401                             int dquant= dquant_tab[qpi];
2402                             qp= last_qp + dquant;
2403                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2404                                 continue;
2405                             backup_s.dquant= dquant;
2406                             if(s->mb_intra && s->dc_val[0]){
2407                                 for(i=0; i<6; i++){
2408                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2409                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2410                                 }
2411                             }
2412
2413                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2414                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2415                             if(best_s.qscale != qp){
2416                                 if(s->mb_intra && s->dc_val[0]){
2417                                     for(i=0; i<6; i++){
2418                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2419                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2420                                     }
2421                                 }
2422                             }
2423                         }
2424                     }
2425                 }
2426                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2427                     int mx= s->b_direct_mv_table[xy][0];
2428                     int my= s->b_direct_mv_table[xy][1];
2429
2430                     backup_s.dquant = 0;
2431                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2432                     s->mb_intra= 0;
2433                     ff_mpeg4_set_direct_mv(s, mx, my);
2434                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2435                                  &dmin, &next_block, mx, my);
2436                 }
2437                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2438                     backup_s.dquant = 0;
2439                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2440                     s->mb_intra= 0;
2441                     ff_mpeg4_set_direct_mv(s, 0, 0);
2442                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2443                                  &dmin, &next_block, 0, 0);
2444                 }
2445                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2446                     int coded=0;
2447                     for(i=0; i<6; i++)
2448                         coded |= s->block_last_index[i];
2449                     if(coded){
2450                         int mx,my;
2451                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2452                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2453                             mx=my=0; //FIXME find the one we actually used
2454                             ff_mpeg4_set_direct_mv(s, mx, my);
2455                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2456                             mx= s->mv[1][0][0];
2457                             my= s->mv[1][0][1];
2458                         }else{
2459                             mx= s->mv[0][0][0];
2460                             my= s->mv[0][0][1];
2461                         }
2462
2463                         s->mv_dir= best_s.mv_dir;
2464                         s->mv_type = best_s.mv_type;
2465                         s->mb_intra= 0;
2466 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2467                         s->mv[0][0][1] = best_s.mv[0][0][1];
2468                         s->mv[1][0][0] = best_s.mv[1][0][0];
2469                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2470                         backup_s.dquant= 0;
2471                         s->skipdct=1;
2472                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2473                                         &dmin, &next_block, mx, my);
2474                         s->skipdct=0;
2475                     }
2476                 }
2477
2478                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2479
2480                 copy_context_after_encode(s, &best_s, -1);
2481
2482                 pb_bits_count= put_bits_count(&s->pb);
2483                 flush_put_bits(&s->pb);
2484                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2485                 s->pb= backup_s.pb;
2486
2487                 if(s->data_partitioning){
2488                     pb2_bits_count= put_bits_count(&s->pb2);
2489                     flush_put_bits(&s->pb2);
2490                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2491                     s->pb2= backup_s.pb2;
2492
2493                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2494                     flush_put_bits(&s->tex_pb);
2495                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2496                     s->tex_pb= backup_s.tex_pb;
2497                 }
2498                 s->last_bits= put_bits_count(&s->pb);
2499
2500                 if (CONFIG_H263_ENCODER &&
2501                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2502                     ff_h263_update_motion_val(s);
2503
2504                 if(next_block==0){ //FIXME 16 vs linesize16
2505                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2506                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2507                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2508                 }
2509
2510                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2511                     MPV_decode_mb(s, s->block);
2512             } else {
2513                 int motion_x = 0, motion_y = 0;
2514                 s->mv_type=MV_TYPE_16X16;
2515                 // only one MB-Type possible
2516
2517                 switch(mb_type){
2518                 case CANDIDATE_MB_TYPE_INTRA:
2519                     s->mv_dir = 0;
2520                     s->mb_intra= 1;
2521                     motion_x= s->mv[0][0][0] = 0;
2522                     motion_y= s->mv[0][0][1] = 0;
2523                     break;
2524                 case CANDIDATE_MB_TYPE_INTER:
2525                     s->mv_dir = MV_DIR_FORWARD;
2526                     s->mb_intra= 0;
2527                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2528                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2529                     break;
2530                 case CANDIDATE_MB_TYPE_INTER_I:
2531                     s->mv_dir = MV_DIR_FORWARD;
2532                     s->mv_type = MV_TYPE_FIELD;
2533                     s->mb_intra= 0;
2534                     for(i=0; i<2; i++){
2535                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2536                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2537                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2538                     }
2539                     break;
2540                 case CANDIDATE_MB_TYPE_INTER4V:
2541                     s->mv_dir = MV_DIR_FORWARD;
2542                     s->mv_type = MV_TYPE_8X8;
2543                     s->mb_intra= 0;
2544                     for(i=0; i<4; i++){
2545                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2546                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2547                     }
2548                     break;
2549                 case CANDIDATE_MB_TYPE_DIRECT:
2550                     if (CONFIG_MPEG4_ENCODER) {
2551                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2552                         s->mb_intra= 0;
2553                         motion_x=s->b_direct_mv_table[xy][0];
2554                         motion_y=s->b_direct_mv_table[xy][1];
2555                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2556                     }
2557                     break;
2558                 case CANDIDATE_MB_TYPE_DIRECT0:
2559                     if (CONFIG_MPEG4_ENCODER) {
2560                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2561                         s->mb_intra= 0;
2562                         ff_mpeg4_set_direct_mv(s, 0, 0);
2563                     }
2564                     break;
2565                 case CANDIDATE_MB_TYPE_BIDIR:
2566                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2567                     s->mb_intra= 0;
2568                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2569                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2570                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2571                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2572                     break;
2573                 case CANDIDATE_MB_TYPE_BACKWARD:
2574                     s->mv_dir = MV_DIR_BACKWARD;
2575                     s->mb_intra= 0;
2576                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2577                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2578                     break;
2579                 case CANDIDATE_MB_TYPE_FORWARD:
2580                     s->mv_dir = MV_DIR_FORWARD;
2581                     s->mb_intra= 0;
2582                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2583                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2584 //                    printf(" %d %d ", motion_x, motion_y);
2585                     break;
2586                 case CANDIDATE_MB_TYPE_FORWARD_I:
2587                     s->mv_dir = MV_DIR_FORWARD;
2588                     s->mv_type = MV_TYPE_FIELD;
2589                     s->mb_intra= 0;
2590                     for(i=0; i<2; i++){
2591                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2592                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2593                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2594                     }
2595                     break;
2596                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2597                     s->mv_dir = MV_DIR_BACKWARD;
2598                     s->mv_type = MV_TYPE_FIELD;
2599                     s->mb_intra= 0;
2600                     for(i=0; i<2; i++){
2601                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2602                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2603                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2604                     }
2605                     break;
2606                 case CANDIDATE_MB_TYPE_BIDIR_I:
2607                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2608                     s->mv_type = MV_TYPE_FIELD;
2609                     s->mb_intra= 0;
2610                     for(dir=0; dir<2; dir++){
2611                         for(i=0; i<2; i++){
2612                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2613                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2614                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2615                         }
2616                     }
2617                     break;
2618                 default:
2619                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2620                 }
2621
2622                 encode_mb(s, motion_x, motion_y);
2623
2624                 // RAL: Update last macroblock type
2625                 s->last_mv_dir = s->mv_dir;
2626
2627                 if (CONFIG_H263_ENCODER &&
2628                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2629                     ff_h263_update_motion_val(s);
2630
2631                 MPV_decode_mb(s, s->block);
2632             }
2633
2634             /* clean the MV table in IPS frames for direct mode in B frames */
2635             if(s->mb_intra /* && I,P,S_TYPE */){
2636                 s->p_mv_table[xy][0]=0;
2637                 s->p_mv_table[xy][1]=0;
2638             }
2639
2640             if(s->flags&CODEC_FLAG_PSNR){
2641                 int w= 16;
2642                 int h= 16;
2643
2644                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2645                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2646
2647                 s->current_picture.f.error[0] += sse(
2648                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2649                     s->dest[0], w, h, s->linesize);
2650                 s->current_picture.f.error[1] += sse(
2651                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2652                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2653                 s->current_picture.f.error[2] += sse(
2654                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2655                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2656             }
2657             if(s->loop_filter){
2658                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2659                     ff_h263_loop_filter(s);
2660             }
2661 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2662         }
2663     }
2664
2665     //not beautiful here but we must write it before flushing so it has to be here
2666     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2667         msmpeg4_encode_ext_header(s);
2668
2669     write_slice_end(s);
2670
2671     /* Send the last GOB if RTP */
2672     if (s->avctx->rtp_callback) {
2673         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2674         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2675         /* Call the RTP callback to send the last GOB */
2676         emms_c();
2677         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2678     }
2679
2680     return 0;
2681 }
2682
2683 #define MERGE(field) dst->field += src->field; src->field=0
2684 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2685     MERGE(me.scene_change_score);
2686     MERGE(me.mc_mb_var_sum_temp);
2687     MERGE(me.mb_var_sum_temp);
2688 }
2689
2690 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2691     int i;
2692
2693     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2694     MERGE(dct_count[1]);
2695     MERGE(mv_bits);
2696     MERGE(i_tex_bits);
2697     MERGE(p_tex_bits);
2698     MERGE(i_count);
2699     MERGE(f_count);
2700     MERGE(b_count);
2701     MERGE(skip_count);
2702     MERGE(misc_bits);
2703     MERGE(error_count);
2704     MERGE(padding_bug_score);
2705     MERGE(current_picture.f.error[0]);
2706     MERGE(current_picture.f.error[1]);
2707     MERGE(current_picture.f.error[2]);
2708
2709     if(dst->avctx->noise_reduction){
2710         for(i=0; i<64; i++){
2711             MERGE(dct_error_sum[0][i]);
2712             MERGE(dct_error_sum[1][i]);
2713         }
2714     }
2715
2716     assert(put_bits_count(&src->pb) % 8 ==0);
2717     assert(put_bits_count(&dst->pb) % 8 ==0);
2718     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2719     flush_put_bits(&dst->pb);
2720 }
2721
2722 static int estimate_qp(MpegEncContext *s, int dry_run){
2723     if (s->next_lambda){
2724         s->current_picture_ptr->f.quality =
2725         s->current_picture.f.quality = s->next_lambda;
2726         if(!dry_run) s->next_lambda= 0;
2727     } else if (!s->fixed_qscale) {
2728         s->current_picture_ptr->f.quality =
2729         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2730         if (s->current_picture.f.quality < 0)
2731             return -1;
2732     }
2733
2734     if(s->adaptive_quant){
2735         switch(s->codec_id){
2736         case CODEC_ID_MPEG4:
2737             if (CONFIG_MPEG4_ENCODER)
2738                 ff_clean_mpeg4_qscales(s);
2739             break;
2740         case CODEC_ID_H263:
2741         case CODEC_ID_H263P:
2742         case CODEC_ID_FLV1:
2743             if (CONFIG_H263_ENCODER)
2744                 ff_clean_h263_qscales(s);
2745             break;
2746         default:
2747             ff_init_qscale_tab(s);
2748         }
2749
2750         s->lambda= s->lambda_table[0];
2751         //FIXME broken
2752     }else
2753         s->lambda = s->current_picture.f.quality;
2754 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
2755     update_qscale(s);
2756     return 0;
2757 }
2758
2759 /* must be called before writing the header */
2760 static void set_frame_distances(MpegEncContext * s){
2761     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
2762     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
2763
2764     if(s->pict_type==AV_PICTURE_TYPE_B){
2765         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
2766         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
2767     }else{
2768         s->pp_time= s->time - s->last_non_b_time;
2769         s->last_non_b_time= s->time;
2770         assert(s->picture_number==0 || s->pp_time > 0);
2771     }
2772 }
2773
2774 static int encode_picture(MpegEncContext *s, int picture_number)
2775 {
2776     int i;
2777     int bits;
2778     int context_count = s->avctx->thread_count;
2779
2780     s->picture_number = picture_number;
2781
2782     /* Reset the average MB variance */
2783     s->me.mb_var_sum_temp    =
2784     s->me.mc_mb_var_sum_temp = 0;
2785
2786     /* we need to initialize some time vars before we can encode b-frames */
2787     // RAL: Condition added for MPEG1VIDEO
2788     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
2789         set_frame_distances(s);
2790     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
2791         ff_set_mpeg4_time(s);
2792
2793     s->me.scene_change_score=0;
2794
2795 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
2796
2797     if(s->pict_type==AV_PICTURE_TYPE_I){
2798         if(s->msmpeg4_version >= 3) s->no_rounding=1;
2799         else                        s->no_rounding=0;
2800     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
2801         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
2802             s->no_rounding ^= 1;
2803     }
2804
2805     if(s->flags & CODEC_FLAG_PASS2){
2806         if (estimate_qp(s,1) < 0)
2807             return -1;
2808         ff_get_2pass_fcode(s);
2809     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
2810         if(s->pict_type==AV_PICTURE_TYPE_B)
2811             s->lambda= s->last_lambda_for[s->pict_type];
2812         else
2813             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
2814         update_qscale(s);
2815     }
2816
2817     s->mb_intra=0; //for the rate distortion & bit compare functions
2818     for(i=1; i<context_count; i++){
2819         ff_update_duplicate_context(s->thread_context[i], s);
2820     }
2821
2822     if(ff_init_me(s)<0)
2823         return -1;
2824
2825     /* Estimate motion for every MB */
2826     if(s->pict_type != AV_PICTURE_TYPE_I){
2827         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
2828         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
2829         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
2830             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
2831                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2832             }
2833         }
2834
2835         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2836     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
2837         /* I-Frame */
2838         for(i=0; i<s->mb_stride*s->mb_height; i++)
2839             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
2840
2841         if(!s->fixed_qscale){
2842             /* finding spatial complexity for I-frame rate control */
2843             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2844         }
2845     }
2846     for(i=1; i<context_count; i++){
2847         merge_context_after_me(s, s->thread_context[i]);
2848     }
2849     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
2850     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
2851     emms_c();
2852
2853     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
2854         s->pict_type= AV_PICTURE_TYPE_I;
2855         for(i=0; i<s->mb_stride*s->mb_height; i++)
2856             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
2857 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2858     }
2859
2860     if(!s->umvplus){
2861         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
2862             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
2863
2864             if(s->flags & CODEC_FLAG_INTERLACED_ME){
2865                 int a,b;
2866                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
2867                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
2868                 s->f_code= FFMAX3(s->f_code, a, b);
2869             }
2870
2871             ff_fix_long_p_mvs(s);
2872             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
2873             if(s->flags & CODEC_FLAG_INTERLACED_ME){
2874                 int j;
2875                 for(i=0; i<2; i++){
2876                     for(j=0; j<2; j++)
2877                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
2878                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
2879                 }
2880             }
2881         }
2882
2883         if(s->pict_type==AV_PICTURE_TYPE_B){
2884             int a, b;
2885
2886             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
2887             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
2888             s->f_code = FFMAX(a, b);
2889
2890             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
2891             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
2892             s->b_code = FFMAX(a, b);
2893
2894             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
2895             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
2896             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
2897             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
2898             if(s->flags & CODEC_FLAG_INTERLACED_ME){
2899                 int dir, j;
2900                 for(dir=0; dir<2; dir++){
2901                     for(i=0; i<2; i++){
2902                         for(j=0; j<2; j++){
2903                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
2904                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
2905                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
2906                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
2907                         }
2908                     }
2909                 }
2910             }
2911         }
2912     }
2913
2914     if (estimate_qp(s, 0) < 0)
2915         return -1;
2916
2917     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
2918         s->qscale= 3; //reduce clipping problems
2919
2920     if (s->out_format == FMT_MJPEG) {
2921         /* for mjpeg, we do include qscale in the matrix */
2922         for(i=1;i<64;i++){
2923             int j= s->dsp.idct_permutation[i];
2924
2925             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
2926         }
2927         s->y_dc_scale_table=
2928         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
2929         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
2930         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
2931                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
2932         s->qscale= 8;
2933     }
2934
2935     //FIXME var duplication
2936     s->current_picture_ptr->f.key_frame =
2937     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
2938     s->current_picture_ptr->f.pict_type =
2939     s->current_picture.f.pict_type = s->pict_type;
2940
2941     if (s->current_picture.f.key_frame)
2942         s->picture_in_gop_number=0;
2943
2944     s->last_bits= put_bits_count(&s->pb);
2945     switch(s->out_format) {
2946     case FMT_MJPEG:
2947         if (CONFIG_MJPEG_ENCODER)
2948             ff_mjpeg_encode_picture_header(s);
2949         break;
2950     case FMT_H261:
2951         if (CONFIG_H261_ENCODER)
2952             ff_h261_encode_picture_header(s, picture_number);
2953         break;
2954     case FMT_H263:
2955         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
2956             ff_wmv2_encode_picture_header(s, picture_number);
2957         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
2958             msmpeg4_encode_picture_header(s, picture_number);
2959         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
2960             mpeg4_encode_picture_header(s, picture_number);
2961         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
2962             rv10_encode_picture_header(s, picture_number);
2963         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
2964             rv20_encode_picture_header(s, picture_number);
2965         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
2966             ff_flv_encode_picture_header(s, picture_number);
2967         else if (CONFIG_H263_ENCODER)
2968             h263_encode_picture_header(s, picture_number);
2969         break;
2970     case FMT_MPEG1:
2971         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2972             mpeg1_encode_picture_header(s, picture_number);
2973         break;
2974     case FMT_H264:
2975         break;
2976     default:
2977         assert(0);
2978     }
2979     bits= put_bits_count(&s->pb);
2980     s->header_bits= bits - s->last_bits;
2981
2982     for(i=1; i<context_count; i++){
2983         update_duplicate_context_after_me(s->thread_context[i], s);
2984     }
2985     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
2986     for(i=1; i<context_count; i++){
2987         merge_context_after_encode(s, s->thread_context[i]);
2988     }
2989     emms_c();
2990     return 0;
2991 }
2992
2993 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
2994     const int intra= s->mb_intra;
2995     int i;
2996
2997     s->dct_count[intra]++;
2998
2999     for(i=0; i<64; i++){
3000         int level= block[i];
3001
3002         if(level){
3003             if(level>0){
3004                 s->dct_error_sum[intra][i] += level;
3005                 level -= s->dct_offset[intra][i];
3006                 if(level<0) level=0;
3007             }else{
3008                 s->dct_error_sum[intra][i] -= level;
3009                 level += s->dct_offset[intra][i];
3010                 if(level>0) level=0;
3011             }
3012             block[i]= level;
3013         }
3014     }
3015 }
3016
3017 static int dct_quantize_trellis_c(MpegEncContext *s,
3018                                   DCTELEM *block, int n,
3019                                   int qscale, int *overflow){
3020     const int *qmat;
3021     const uint8_t *scantable= s->intra_scantable.scantable;
3022     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3023     int max=0;
3024     unsigned int threshold1, threshold2;
3025     int bias=0;
3026     int run_tab[65];
3027     int level_tab[65];
3028     int score_tab[65];
3029     int survivor[65];
3030     int survivor_count;
3031     int last_run=0;
3032     int last_level=0;
3033     int last_score= 0;
3034     int last_i;
3035     int coeff[2][64];
3036     int coeff_count[64];
3037     int qmul, qadd, start_i, last_non_zero, i, dc;
3038     const int esc_length= s->ac_esc_length;
3039     uint8_t * length;
3040     uint8_t * last_length;
3041     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3042
3043     s->dsp.fdct (block);
3044
3045     if(s->dct_error_sum)
3046         s->denoise_dct(s, block);
3047     qmul= qscale*16;
3048     qadd= ((qscale-1)|1)*8;
3049
3050     if (s->mb_intra) {
3051         int q;
3052         if (!s->h263_aic) {
3053             if (n < 4)
3054                 q = s->y_dc_scale;
3055             else
3056                 q = s->c_dc_scale;
3057             q = q << 3;
3058         } else{
3059             /* For AIC we skip quant/dequant of INTRADC */
3060             q = 1 << 3;
3061             qadd=0;
3062         }
3063
3064         /* note: block[0] is assumed to be positive */
3065         block[0] = (block[0] + (q >> 1)) / q;
3066         start_i = 1;
3067         last_non_zero = 0;
3068         qmat = s->q_intra_matrix[qscale];
3069         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3070             bias= 1<<(QMAT_SHIFT-1);
3071         length     = s->intra_ac_vlc_length;
3072         last_length= s->intra_ac_vlc_last_length;
3073     } else {
3074         start_i = 0;
3075         last_non_zero = -1;
3076         qmat = s->q_inter_matrix[qscale];
3077         length     = s->inter_ac_vlc_length;
3078         last_length= s->inter_ac_vlc_last_length;
3079     }
3080     last_i= start_i;
3081
3082     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3083     threshold2= (threshold1<<1);
3084
3085     for(i=63; i>=start_i; i--) {
3086         const int j = scantable[i];
3087         int level = block[j] * qmat[j];
3088
3089         if(((unsigned)(level+threshold1))>threshold2){
3090             last_non_zero = i;
3091             break;
3092         }
3093     }
3094
3095     for(i=start_i; i<=last_non_zero; i++) {
3096         const int j = scantable[i];
3097         int level = block[j] * qmat[j];
3098
3099 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3100 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3101         if(((unsigned)(level+threshold1))>threshold2){
3102             if(level>0){
3103                 level= (bias + level)>>QMAT_SHIFT;
3104                 coeff[0][i]= level;
3105                 coeff[1][i]= level-1;
3106 //                coeff[2][k]= level-2;
3107             }else{
3108                 level= (bias - level)>>QMAT_SHIFT;
3109                 coeff[0][i]= -level;
3110                 coeff[1][i]= -level+1;
3111 //                coeff[2][k]= -level+2;
3112             }
3113             coeff_count[i]= FFMIN(level, 2);
3114             assert(coeff_count[i]);
3115             max |=level;
3116         }else{
3117             coeff[0][i]= (level>>31)|1;
3118             coeff_count[i]= 1;
3119         }
3120     }
3121
3122     *overflow= s->max_qcoeff < max; //overflow might have happened
3123
3124     if(last_non_zero < start_i){
3125         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3126         return last_non_zero;
3127     }
3128
3129     score_tab[start_i]= 0;
3130     survivor[0]= start_i;
3131     survivor_count= 1;
3132
3133     for(i=start_i; i<=last_non_zero; i++){
3134         int level_index, j, zero_distortion;
3135         int dct_coeff= FFABS(block[ scantable[i] ]);
3136         int best_score=256*256*256*120;
3137
3138         if (   s->dsp.fdct == fdct_ifast
3139 #ifndef FAAN_POSTSCALE
3140             || s->dsp.fdct == ff_faandct
3141 #endif
3142            )
3143             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3144         zero_distortion= dct_coeff*dct_coeff;
3145
3146         for(level_index=0; level_index < coeff_count[i]; level_index++){
3147             int distortion;
3148             int level= coeff[level_index][i];
3149             const int alevel= FFABS(level);
3150             int unquant_coeff;
3151
3152             assert(level);
3153
3154             if(s->out_format == FMT_H263){
3155                 unquant_coeff= alevel*qmul + qadd;
3156             }else{ //MPEG1
3157                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3158                 if(s->mb_intra){
3159                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3160                         unquant_coeff =   (unquant_coeff - 1) | 1;
3161                 }else{
3162                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3163                         unquant_coeff =   (unquant_coeff - 1) | 1;
3164                 }
3165                 unquant_coeff<<= 3;
3166             }
3167
3168             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3169             level+=64;
3170             if((level&(~127)) == 0){
3171                 for(j=survivor_count-1; j>=0; j--){
3172                     int run= i - survivor[j];
3173                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3174                     score += score_tab[i-run];
3175
3176                     if(score < best_score){
3177                         best_score= score;
3178                         run_tab[i+1]= run;
3179                         level_tab[i+1]= level-64;
3180                     }
3181                 }
3182
3183                 if(s->out_format == FMT_H263){
3184                     for(j=survivor_count-1; j>=0; j--){
3185                         int run= i - survivor[j];
3186                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3187                         score += score_tab[i-run];
3188                         if(score < last_score){
3189                             last_score= score;
3190                             last_run= run;
3191                             last_level= level-64;
3192                             last_i= i+1;
3193                         }
3194                     }
3195                 }
3196             }else{
3197                 distortion += esc_length*lambda;
3198                 for(j=survivor_count-1; j>=0; j--){
3199                     int run= i - survivor[j];
3200                     int score= distortion + score_tab[i-run];
3201
3202                     if(score < best_score){
3203                         best_score= score;
3204                         run_tab[i+1]= run;
3205                         level_tab[i+1]= level-64;
3206                     }
3207                 }
3208
3209                 if(s->out_format == FMT_H263){
3210                   for(j=survivor_count-1; j>=0; j--){
3211                         int run= i - survivor[j];
3212                         int score= distortion + score_tab[i-run];
3213                         if(score < last_score){
3214                             last_score= score;
3215                             last_run= run;
3216                             last_level= level-64;
3217                             last_i= i+1;
3218                         }
3219                     }
3220                 }
3221             }
3222         }
3223
3224         score_tab[i+1]= best_score;
3225
3226         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3227         if(last_non_zero <= 27){
3228             for(; survivor_count; survivor_count--){
3229                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3230                     break;
3231             }
3232         }else{
3233             for(; survivor_count; survivor_count--){
3234                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3235                     break;
3236             }
3237         }
3238
3239         survivor[ survivor_count++ ]= i+1;
3240     }
3241
3242     if(s->out_format != FMT_H263){
3243         last_score= 256*256*256*120;
3244         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3245             int score= score_tab[i];
3246             if(i) score += lambda*2; //FIXME exacter?
3247
3248             if(score < last_score){
3249                 last_score= score;
3250                 last_i= i;
3251                 last_level= level_tab[i];
3252                 last_run= run_tab[i];
3253             }
3254         }
3255     }
3256
3257     s->coded_score[n] = last_score;
3258
3259     dc= FFABS(block[0]);
3260     last_non_zero= last_i - 1;
3261     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3262
3263     if(last_non_zero < start_i)
3264         return last_non_zero;
3265
3266     if(last_non_zero == 0 && start_i == 0){
3267         int best_level= 0;
3268         int best_score= dc * dc;
3269
3270         for(i=0; i<coeff_count[0]; i++){
3271             int level= coeff[i][0];
3272             int alevel= FFABS(level);
3273             int unquant_coeff, score, distortion;
3274
3275             if(s->out_format == FMT_H263){
3276                     unquant_coeff= (alevel*qmul + qadd)>>3;
3277             }else{ //MPEG1
3278                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3279                     unquant_coeff =   (unquant_coeff - 1) | 1;
3280             }
3281             unquant_coeff = (unquant_coeff + 4) >> 3;
3282             unquant_coeff<<= 3 + 3;
3283
3284             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3285             level+=64;
3286             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3287             else                    score= distortion + esc_length*lambda;
3288
3289             if(score < best_score){
3290                 best_score= score;
3291                 best_level= level - 64;
3292             }
3293         }
3294         block[0]= best_level;
3295         s->coded_score[n] = best_score - dc*dc;
3296         if(best_level == 0) return -1;
3297         else                return last_non_zero;
3298     }
3299
3300     i= last_i;
3301     assert(last_level);
3302
3303     block[ perm_scantable[last_non_zero] ]= last_level;
3304     i -= last_run + 1;
3305
3306     for(; i>start_i; i -= run_tab[i] + 1){
3307         block[ perm_scantable[i-1] ]= level_tab[i];
3308     }
3309
3310     return last_non_zero;
3311 }
3312
3313 //#define REFINE_STATS 1
3314 static int16_t basis[64][64];
3315
3316 static void build_basis(uint8_t *perm){
3317     int i, j, x, y;
3318     emms_c();
3319     for(i=0; i<8; i++){
3320         for(j=0; j<8; j++){
3321             for(y=0; y<8; y++){
3322                 for(x=0; x<8; x++){
3323                     double s= 0.25*(1<<BASIS_SHIFT);
3324                     int index= 8*i + j;
3325                     int perm_index= perm[index];
3326                     if(i==0) s*= sqrt(0.5);
3327                     if(j==0) s*= sqrt(0.5);
3328                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3329                 }
3330             }
3331         }
3332     }
3333 }
3334
3335 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3336                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3337                         int n, int qscale){
3338     int16_t rem[64];
3339     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3340     const uint8_t *scantable= s->intra_scantable.scantable;
3341     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3342 //    unsigned int threshold1, threshold2;
3343 //    int bias=0;
3344     int run_tab[65];
3345     int prev_run=0;
3346     int prev_level=0;
3347     int qmul, qadd, start_i, last_non_zero, i, dc;
3348     uint8_t * length;
3349     uint8_t * last_length;
3350     int lambda;
3351     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3352 #ifdef REFINE_STATS
3353 static int count=0;
3354 static int after_last=0;
3355 static int to_zero=0;
3356 static int from_zero=0;
3357 static int raise=0;
3358 static int lower=0;
3359 static int messed_sign=0;
3360 #endif
3361
3362     if(basis[0][0] == 0)
3363         build_basis(s->dsp.idct_permutation);
3364
3365     qmul= qscale*2;
3366     qadd= (qscale-1)|1;
3367     if (s->mb_intra) {
3368         if (!s->h263_aic) {
3369             if (n < 4)
3370                 q = s->y_dc_scale;
3371             else
3372                 q = s->c_dc_scale;
3373         } else{
3374             /* For AIC we skip quant/dequant of INTRADC */
3375             q = 1;
3376             qadd=0;
3377         }
3378         q <<= RECON_SHIFT-3;
3379         /* note: block[0] is assumed to be positive */
3380         dc= block[0]*q;
3381 //        block[0] = (block[0] + (q >> 1)) / q;
3382         start_i = 1;
3383 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3384 //            bias= 1<<(QMAT_SHIFT-1);
3385         length     = s->intra_ac_vlc_length;
3386         last_length= s->intra_ac_vlc_last_length;
3387     } else {
3388         dc= 0;
3389         start_i = 0;
3390         length     = s->inter_ac_vlc_length;
3391         last_length= s->inter_ac_vlc_last_length;
3392     }
3393     last_non_zero = s->block_last_index[n];
3394
3395 #ifdef REFINE_STATS
3396 {START_TIMER
3397 #endif
3398     dc += (1<<(RECON_SHIFT-1));
3399     for(i=0; i<64; i++){
3400         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3401     }
3402 #ifdef REFINE_STATS
3403 STOP_TIMER("memset rem[]")}
3404 #endif
3405     sum=0;
3406     for(i=0; i<64; i++){
3407         int one= 36;
3408         int qns=4;
3409         int w;
3410
3411         w= FFABS(weight[i]) + qns*one;
3412         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3413
3414         weight[i] = w;
3415 //        w=weight[i] = (63*qns + (w/2)) / w;
3416
3417         assert(w>0);
3418         assert(w<(1<<6));
3419         sum += w*w;
3420     }
3421     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3422 #ifdef REFINE_STATS
3423 {START_TIMER
3424 #endif
3425     run=0;
3426     rle_index=0;
3427     for(i=start_i; i<=last_non_zero; i++){
3428         int j= perm_scantable[i];
3429         const int level= block[j];
3430         int coeff;
3431
3432         if(level){
3433             if(level<0) coeff= qmul*level - qadd;
3434             else        coeff= qmul*level + qadd;
3435             run_tab[rle_index++]=run;
3436             run=0;
3437
3438             s->dsp.add_8x8basis(rem, basis[j], coeff);
3439         }else{
3440             run++;
3441         }
3442     }
3443 #ifdef REFINE_STATS
3444 if(last_non_zero>0){
3445 STOP_TIMER("init rem[]")
3446 }
3447 }
3448
3449 {START_TIMER
3450 #endif
3451     for(;;){
3452         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3453         int best_coeff=0;
3454         int best_change=0;
3455         int run2, best_unquant_change=0, analyze_gradient;
3456 #ifdef REFINE_STATS
3457 {START_TIMER
3458 #endif
3459         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3460
3461         if(analyze_gradient){
3462 #ifdef REFINE_STATS
3463 {START_TIMER
3464 #endif
3465             for(i=0; i<64; i++){
3466                 int w= weight[i];
3467
3468                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3469             }
3470 #ifdef REFINE_STATS
3471 STOP_TIMER("rem*w*w")}
3472 {START_TIMER
3473 #endif
3474             s->dsp.fdct(d1);
3475 #ifdef REFINE_STATS
3476 STOP_TIMER("dct")}
3477 #endif
3478         }
3479
3480         if(start_i){
3481             const int level= block[0];
3482             int change, old_coeff;
3483
3484             assert(s->mb_intra);
3485
3486             old_coeff= q*level;
3487
3488             for(change=-1; change<=1; change+=2){
3489                 int new_level= level + change;
3490                 int score, new_coeff;
3491
3492                 new_coeff= q*new_level;
3493                 if(new_coeff >= 2048 || new_coeff < 0)
3494                     continue;
3495
3496                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3497                 if(score<best_score){
3498                     best_score= score;
3499                     best_coeff= 0;
3500                     best_change= change;
3501                     best_unquant_change= new_coeff - old_coeff;
3502                 }
3503             }
3504         }
3505
3506         run=0;
3507         rle_index=0;
3508         run2= run_tab[rle_index++];
3509         prev_level=0;
3510         prev_run=0;
3511
3512         for(i=start_i; i<64; i++){
3513             int j= perm_scantable[i];
3514             const int level= block[j];
3515             int change, old_coeff;
3516
3517             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3518                 break;
3519
3520             if(level){
3521                 if(level<0) old_coeff= qmul*level - qadd;
3522                 else        old_coeff= qmul*level + qadd;
3523                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3524             }else{
3525                 old_coeff=0;
3526                 run2--;
3527                 assert(run2>=0 || i >= last_non_zero );
3528             }
3529
3530             for(change=-1; change<=1; change+=2){
3531                 int new_level= level + change;
3532                 int score, new_coeff, unquant_change;
3533
3534                 score=0;
3535                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3536                    continue;
3537
3538                 if(new_level){
3539                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3540                     else            new_coeff= qmul*new_level + qadd;
3541                     if(new_coeff >= 2048 || new_coeff <= -2048)
3542                         continue;
3543                     //FIXME check for overflow
3544
3545                     if(level){
3546                         if(level < 63 && level > -63){
3547                             if(i < last_non_zero)
3548                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3549                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3550                             else
3551                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3552                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3553                         }
3554                     }else{
3555                         assert(FFABS(new_level)==1);
3556
3557                         if(analyze_gradient){
3558                             int g= d1[ scantable[i] ];
3559                             if(g && (g^new_level) >= 0)
3560                                 continue;
3561                         }
3562
3563                         if(i < last_non_zero){
3564                             int next_i= i + run2 + 1;
3565                             int next_level= block[ perm_scantable[next_i] ] + 64;
3566
3567                             if(next_level&(~127))
3568                                 next_level= 0;
3569
3570                             if(next_i < last_non_zero)
3571                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3572                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3573                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3574                             else
3575                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3576                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3577                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3578                         }else{
3579                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3580                             if(prev_level){
3581                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3582                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3583                             }
3584                         }
3585                     }
3586                 }else{
3587                     new_coeff=0;
3588                     assert(FFABS(level)==1);
3589
3590                     if(i < last_non_zero){
3591                         int next_i= i + run2 + 1;
3592                         int next_level= block[ perm_scantable[next_i] ] + 64;
3593
3594                         if(next_level&(~127))
3595                             next_level= 0;
3596
3597                         if(next_i < last_non_zero)
3598                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3599                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3600                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3601                         else
3602                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3603                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3604                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3605                     }else{
3606                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3607                         if(prev_level){
3608                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3609                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3610                         }
3611                     }
3612                 }
3613
3614                 score *= lambda;
3615
3616                 unquant_change= new_coeff - old_coeff;
3617                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3618
3619                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3620                 if(score<best_score){
3621                     best_score= score;
3622                     best_coeff= i;
3623                     best_change= change;
3624                     best_unquant_change= unquant_change;
3625                 }
3626             }
3627             if(level){
3628                 prev_level= level + 64;
3629                 if(prev_level&(~127))
3630                     prev_level= 0;
3631                 prev_run= run;
3632                 run=0;
3633             }else{
3634                 run++;
3635             }
3636         }
3637 #ifdef REFINE_STATS
3638 STOP_TIMER("iterative step")}
3639 #endif
3640
3641         if(best_change){
3642             int j= perm_scantable[ best_coeff ];
3643
3644             block[j] += best_change;
3645
3646             if(best_coeff > last_non_zero){
3647                 last_non_zero= best_coeff;
3648                 assert(block[j]);
3649 #ifdef REFINE_STATS
3650 after_last++;
3651 #endif
3652             }else{
3653 #ifdef REFINE_STATS
3654 if(block[j]){
3655     if(block[j] - best_change){
3656         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3657             raise++;
3658         }else{
3659             lower++;
3660         }
3661     }else{
3662         from_zero++;
3663     }
3664 }else{
3665     to_zero++;
3666 }
3667 #endif
3668                 for(; last_non_zero>=start_i; last_non_zero--){
3669                     if(block[perm_scantable[last_non_zero]])
3670                         break;
3671                 }
3672             }
3673 #ifdef REFINE_STATS
3674 count++;
3675 if(256*256*256*64 % count == 0){
3676     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3677 }
3678 #endif
3679             run=0;
3680             rle_index=0;
3681             for(i=start_i; i<=last_non_zero; i++){
3682                 int j= perm_scantable[i];
3683                 const int level= block[j];
3684
3685                  if(level){
3686                      run_tab[rle_index++]=run;
3687                      run=0;
3688                  }else{
3689                      run++;
3690                  }
3691             }
3692
3693             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3694         }else{
3695             break;
3696         }
3697     }
3698 #ifdef REFINE_STATS
3699 if(last_non_zero>0){
3700 STOP_TIMER("iterative search")
3701 }
3702 }
3703 #endif
3704
3705     return last_non_zero;
3706 }
3707
3708 int dct_quantize_c(MpegEncContext *s,
3709                         DCTELEM *block, int n,
3710                         int qscale, int *overflow)
3711 {
3712     int i, j, level, last_non_zero, q, start_i;
3713     const int *qmat;
3714     const uint8_t *scantable= s->intra_scantable.scantable;
3715     int bias;
3716     int max=0;
3717     unsigned int threshold1, threshold2;
3718
3719     s->dsp.fdct (block);
3720
3721     if(s->dct_error_sum)
3722         s->denoise_dct(s, block);
3723
3724     if (s->mb_intra) {
3725         if (!s->h263_aic) {
3726             if (n < 4)
3727                 q = s->y_dc_scale;
3728             else
3729                 q = s->c_dc_scale;
3730             q = q << 3;
3731         } else
3732             /* For AIC we skip quant/dequant of INTRADC */
3733             q = 1 << 3;
3734
3735         /* note: block[0] is assumed to be positive */
3736         block[0] = (block[0] + (q >> 1)) / q;
3737         start_i = 1;
3738         last_non_zero = 0;
3739         qmat = s->q_intra_matrix[qscale];
3740         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3741     } else {
3742         start_i = 0;
3743         last_non_zero = -1;
3744         qmat = s->q_inter_matrix[qscale];
3745         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3746     }
3747     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3748     threshold2= (threshold1<<1);
3749     for(i=63;i>=start_i;i--) {
3750         j = scantable[i];
3751         level = block[j] * qmat[j];
3752
3753         if(((unsigned)(level+threshold1))>threshold2){
3754             last_non_zero = i;
3755             break;
3756         }else{
3757             block[j]=0;
3758         }
3759     }
3760     for(i=start_i; i<=last_non_zero; i++) {
3761         j = scantable[i];
3762         level = block[j] * qmat[j];
3763
3764 //        if(   bias+level >= (1<<QMAT_SHIFT)
3765 //           || bias-level >= (1<<QMAT_SHIFT)){
3766         if(((unsigned)(level+threshold1))>threshold2){
3767             if(level>0){
3768                 level= (bias + level)>>QMAT_SHIFT;
3769                 block[j]= level;
3770             }else{
3771                 level= (bias - level)>>QMAT_SHIFT;
3772                 block[j]= -level;
3773             }
3774             max |=level;
3775         }else{
3776             block[j]=0;
3777         }
3778     }
3779     *overflow= s->max_qcoeff < max; //overflow might have happened
3780
3781     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
3782     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
3783         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
3784
3785     return last_non_zero;
3786 }
3787
3788 #define OFFSET(x) offsetof(MpegEncContext, x)
3789 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
3790 static const AVOption h263_options[] = {
3791     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3792     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE},
3793     { NULL },
3794 };
3795
3796 static const AVClass h263_class = {
3797     .class_name = "H.263 encoder",
3798     .item_name  = av_default_item_name,
3799     .option     = h263_options,
3800     .version    = LIBAVUTIL_VERSION_INT,
3801 };
3802
3803 AVCodec ff_h263_encoder = {
3804     .name           = "h263",
3805     .type           = AVMEDIA_TYPE_VIDEO,
3806     .id             = CODEC_ID_H263,
3807     .priv_data_size = sizeof(MpegEncContext),
3808     .init           = MPV_encode_init,
3809     .encode         = MPV_encode_picture,
3810     .close          = MPV_encode_end,
3811     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3812     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
3813     .priv_class     = &h263_class,
3814 };
3815
3816 static const AVOption h263p_options[] = {
3817     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3818     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3819     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE },
3820     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), FF_OPT_TYPE_INT, { 0 }, 0, 1, VE},
3821     { NULL },
3822 };
3823 static const AVClass h263p_class = {
3824     .class_name = "H.263p encoder",
3825     .item_name  = av_default_item_name,
3826     .option     = h263p_options,
3827     .version    = LIBAVUTIL_VERSION_INT,
3828 };
3829
3830 AVCodec ff_h263p_encoder = {
3831     .name           = "h263p",
3832     .type           = AVMEDIA_TYPE_VIDEO,
3833     .id             = CODEC_ID_H263P,
3834     .priv_data_size = sizeof(MpegEncContext),
3835     .init           = MPV_encode_init,
3836     .encode         = MPV_encode_picture,
3837     .close          = MPV_encode_end,
3838     .capabilities = CODEC_CAP_SLICE_THREADS,
3839     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3840     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
3841     .priv_class     = &h263p_class,
3842 };
3843
3844 AVCodec ff_msmpeg4v2_encoder = {
3845     .name           = "msmpeg4v2",
3846     .type           = AVMEDIA_TYPE_VIDEO,
3847     .id             = CODEC_ID_MSMPEG4V2,
3848     .priv_data_size = sizeof(MpegEncContext),
3849     .init           = MPV_encode_init,
3850     .encode         = MPV_encode_picture,
3851     .close          = MPV_encode_end,
3852     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3853     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
3854 };
3855
3856 AVCodec ff_msmpeg4v3_encoder = {
3857     .name           = "msmpeg4",
3858     .type           = AVMEDIA_TYPE_VIDEO,
3859     .id             = CODEC_ID_MSMPEG4V3,
3860     .priv_data_size = sizeof(MpegEncContext),
3861     .init           = MPV_encode_init,
3862     .encode         = MPV_encode_picture,
3863     .close          = MPV_encode_end,
3864     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3865     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
3866 };
3867
3868 AVCodec ff_wmv1_encoder = {
3869     .name           = "wmv1",
3870     .type           = AVMEDIA_TYPE_VIDEO,
3871     .id             = CODEC_ID_WMV1,
3872     .priv_data_size = sizeof(MpegEncContext),
3873     .init           = MPV_encode_init,
3874     .encode         = MPV_encode_picture,
3875     .close          = MPV_encode_end,
3876     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
3877     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
3878 };