]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
set supported pix_fmts
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22  
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */ 
27  
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57 #ifdef CONFIG_ENCODERS
58 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
60 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
61 static int sse_mb(MpegEncContext *s);
62 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
63 #endif //CONFIG_ENCODERS
64
65 #ifdef HAVE_XVMC
66 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
67 extern void XVMC_field_end(MpegEncContext *s);
68 extern void XVMC_decode_mb(MpegEncContext *s);
69 #endif
70
71 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
72
73
74 /* enable all paranoid tests for rounding, overflows, etc... */
75 //#define PARANOID
76
77 //#define DEBUG
78
79
80 /* for jpeg fast DCT */
81 #define CONST_BITS 14
82
83 static const uint16_t aanscales[64] = {
84     /* precomputed values scaled up by 14 bits */
85     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
86     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
87     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
88     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
91     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
92     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
93 };
94
95 static const uint8_t h263_chroma_roundtab[16] = {
96 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
97     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
98 };
99
100 static const uint8_t ff_default_chroma_qscale_table[32]={
101 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
102     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
103 };
104
105 #ifdef CONFIG_ENCODERS
106 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
107 static uint8_t default_fcode_tab[MAX_MV*2+1];
108
109 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
110
111 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
112                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
113 {
114     int qscale;
115     int shift=0;
116
117     for(qscale=qmin; qscale<=qmax; qscale++){
118         int i;
119         if (dsp->fdct == ff_jpeg_fdct_islow 
120 #ifdef FAAN_POSTSCALE
121             || dsp->fdct == ff_faandct
122 #endif
123             ) {
124             for(i=0;i<64;i++) {
125                 const int j= dsp->idct_permutation[i];
126                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
127                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
128                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
129                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
130                 
131                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
132                                 (qscale * quant_matrix[j]));
133             }
134         } else if (dsp->fdct == fdct_ifast
135 #ifndef FAAN_POSTSCALE
136                    || dsp->fdct == ff_faandct
137 #endif
138                    ) {
139             for(i=0;i<64;i++) {
140                 const int j= dsp->idct_permutation[i];
141                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
142                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
143                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
144                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
145                 
146                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
147                                 (aanscales[i] * qscale * quant_matrix[j]));
148             }
149         } else {
150             for(i=0;i<64;i++) {
151                 const int j= dsp->idct_permutation[i];
152                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
153                    So 16           <= qscale * quant_matrix[i]             <= 7905
154                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
155                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
156                 */
157                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
158 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
159                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
160
161                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
162                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
163             }
164         }
165         
166         for(i=intra; i<64; i++){
167             int64_t max= 8191;
168             if (dsp->fdct == fdct_ifast
169 #ifndef FAAN_POSTSCALE
170                    || dsp->fdct == ff_faandct
171 #endif
172                    ) {
173                 max= (8191LL*aanscales[i]) >> 14;
174             }
175             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){ 
176                 shift++;
177             }
178         }
179     }
180     if(shift){
181         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
182     }
183 }
184
185 static inline void update_qscale(MpegEncContext *s){
186     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
187     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
188     
189     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
190 }
191 #endif //CONFIG_ENCODERS
192
193 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
194     int i;
195     int end;
196     
197     st->scantable= src_scantable;
198
199     for(i=0; i<64; i++){
200         int j;
201         j = src_scantable[i];
202         st->permutated[i] = permutation[j];
203 #ifdef ARCH_POWERPC
204         st->inverse[j] = i;
205 #endif
206     }
207     
208     end=-1;
209     for(i=0; i<64; i++){
210         int j;
211         j = st->permutated[i];
212         if(j>end) end=j;
213         st->raster_end[i]= end;
214     }
215 }
216
217 #ifdef CONFIG_ENCODERS
218 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
219     int i;
220
221     if(matrix){
222         put_bits(pb, 1, 1);
223         for(i=0;i<64;i++) {
224             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
225         }
226     }else
227         put_bits(pb, 1, 0);
228 }
229 #endif //CONFIG_ENCODERS
230
231 /* init common dct for both encoder and decoder */
232 int DCT_common_init(MpegEncContext *s)
233 {
234     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
235     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
236     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
237     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
238     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
239     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
240
241 #ifdef CONFIG_ENCODERS
242     s->dct_quantize= dct_quantize_c;
243     s->denoise_dct= denoise_dct_c;
244 #endif
245         
246 #ifdef HAVE_MMX
247     MPV_common_init_mmx(s);
248 #endif
249 #ifdef ARCH_ALPHA
250     MPV_common_init_axp(s);
251 #endif
252 #ifdef HAVE_MLIB
253     MPV_common_init_mlib(s);
254 #endif
255 #ifdef HAVE_MMI
256     MPV_common_init_mmi(s);
257 #endif
258 #ifdef ARCH_ARMV4L
259     MPV_common_init_armv4l(s);
260 #endif
261 #ifdef ARCH_POWERPC
262     MPV_common_init_ppc(s);
263 #endif
264
265 #ifdef CONFIG_ENCODERS
266     s->fast_dct_quantize= s->dct_quantize;
267
268     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
269         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
270     }
271
272 #endif //CONFIG_ENCODERS
273
274     /* load & permutate scantables
275        note: only wmv uses differnt ones 
276     */
277     if(s->alternate_scan){
278         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
279         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
280     }else{
281         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
282         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
283     }
284     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
285     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
286
287     return 0;
288 }
289
290 static void copy_picture(Picture *dst, Picture *src){
291     *dst = *src;
292     dst->type= FF_BUFFER_TYPE_COPY;
293 }
294
295 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
296     int i;
297
298     dst->pict_type              = src->pict_type;
299     dst->quality                = src->quality;
300     dst->coded_picture_number   = src->coded_picture_number;
301     dst->display_picture_number = src->display_picture_number;
302 //    dst->reference              = src->reference;
303     dst->pts                    = src->pts;
304     dst->interlaced_frame       = src->interlaced_frame;
305     dst->top_field_first        = src->top_field_first;
306
307     if(s->avctx->me_threshold){
308         if(!src->motion_val[0])
309             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
310         if(!src->mb_type)
311             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
312         if(!src->ref_index[0])
313             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
314         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
315             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesnt match! (%d!=%d)\n",
316             src->motion_subsample_log2, dst->motion_subsample_log2);
317
318         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
319         
320         for(i=0; i<2; i++){
321             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
322             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
323
324             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
325                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
326             }
327             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
328                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
329             }
330         }
331     }
332 }
333
334 /**
335  * allocates a Picture
336  * The pixels are allocated/set by calling get_buffer() if shared=0
337  */
338 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
339     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
340     const int mb_array_size= s->mb_stride*s->mb_height;
341     const int b8_array_size= s->b8_stride*s->mb_height*2;
342     const int b4_array_size= s->b4_stride*s->mb_height*4;
343     int i;
344     
345     if(shared){
346         assert(pic->data[0]);
347         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
348         pic->type= FF_BUFFER_TYPE_SHARED;
349     }else{
350         int r;
351         
352         assert(!pic->data[0]);
353         
354         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
355         
356         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
357             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
358             return -1;
359         }
360
361         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
362             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
363             return -1;
364         }
365
366         if(pic->linesize[1] != pic->linesize[2]){
367             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
368             return -1;
369         }
370
371         s->linesize  = pic->linesize[0];
372         s->uvlinesize= pic->linesize[1];
373     }
374     
375     if(pic->qscale_table==NULL){
376         if (s->encoding) {        
377             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
378             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
379             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
380         }
381
382         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
383         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
384         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
385         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
386         if(s->out_format == FMT_H264){
387             for(i=0; i<2; i++){
388                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+2)  * sizeof(int16_t))
389                 pic->motion_val[i]= pic->motion_val_base[i]+2;
390                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
391             }
392             pic->motion_subsample_log2= 2;
393         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
394             for(i=0; i<2; i++){
395                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+2) * sizeof(int16_t))
396                 pic->motion_val[i]= pic->motion_val_base[i]+2;
397                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
398             }
399             pic->motion_subsample_log2= 3;
400         }
401         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
402             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
403         }
404         pic->qstride= s->mb_stride;
405         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
406     }
407
408     //it might be nicer if the application would keep track of these but it would require a API change
409     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
410     s->prev_pict_types[0]= s->pict_type;
411     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
412         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
413     
414     return 0;
415 fail: //for the CHECKED_ALLOCZ macro
416     return -1;
417 }
418
419 /**
420  * deallocates a picture
421  */
422 static void free_picture(MpegEncContext *s, Picture *pic){
423     int i;
424
425     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
426         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
427     }
428
429     av_freep(&pic->mb_var);
430     av_freep(&pic->mc_mb_var);
431     av_freep(&pic->mb_mean);
432     av_freep(&pic->mbskip_table);
433     av_freep(&pic->qscale_table);
434     av_freep(&pic->mb_type_base);
435     av_freep(&pic->dct_coeff);
436     av_freep(&pic->pan_scan);
437     pic->mb_type= NULL;
438     for(i=0; i<2; i++){
439         av_freep(&pic->motion_val_base[i]);
440         av_freep(&pic->ref_index[i]);
441     }
442     
443     if(pic->type == FF_BUFFER_TYPE_SHARED){
444         for(i=0; i<4; i++){
445             pic->base[i]=
446             pic->data[i]= NULL;
447         }
448         pic->type= 0;        
449     }
450 }
451
452 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
453     int i;
454
455     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) 
456     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
457     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
458
459      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
460     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t)) 
461     s->rd_scratchpad=   s->me.scratchpad;
462     s->b_scratchpad=    s->me.scratchpad;
463     s->obmc_scratchpad= s->me.scratchpad + 16;
464     if (s->encoding) {
465         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
466         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
467         if(s->avctx->noise_reduction){
468             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
469         }
470     }   
471     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
472     s->block= s->blocks[0];
473
474     for(i=0;i<12;i++){
475         s->pblocks[i] = (short *)(&s->block[i]);
476     }
477     return 0;
478 fail:
479     return -1; //free() through MPV_common_end()
480 }
481
482 static void free_duplicate_context(MpegEncContext *s){
483     if(s==NULL) return;
484
485     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
486     av_freep(&s->me.scratchpad);
487     s->rd_scratchpad=   
488     s->b_scratchpad=    
489     s->obmc_scratchpad= NULL;
490     
491     av_freep(&s->dct_error_sum);
492     av_freep(&s->me.map);
493     av_freep(&s->me.score_map);
494     av_freep(&s->blocks);
495     s->block= NULL;
496 }
497
498 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
499 #define COPY(a) bak->a= src->a
500     COPY(allocated_edge_emu_buffer);
501     COPY(edge_emu_buffer);
502     COPY(me.scratchpad);
503     COPY(rd_scratchpad);
504     COPY(b_scratchpad);
505     COPY(obmc_scratchpad);
506     COPY(me.map);
507     COPY(me.score_map);
508     COPY(blocks);
509     COPY(block);
510     COPY(start_mb_y);
511     COPY(end_mb_y);
512     COPY(me.map_generation);
513     COPY(pb);
514     COPY(dct_error_sum);
515     COPY(dct_count[0]);
516     COPY(dct_count[1]);
517 #undef COPY
518 }
519
520 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
521     MpegEncContext bak;
522     int i;
523     //FIXME copy only needed parts
524 //START_TIMER
525     backup_duplicate_context(&bak, dst);
526     memcpy(dst, src, sizeof(MpegEncContext));
527     backup_duplicate_context(dst, &bak);
528     for(i=0;i<12;i++){
529         dst->pblocks[i] = (short *)(&dst->block[i]);
530     }
531 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
532 }
533
534 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
535 #define COPY(a) dst->a= src->a
536     COPY(pict_type);
537     COPY(current_picture);
538     COPY(f_code);
539     COPY(b_code);
540     COPY(qscale);
541     COPY(lambda);
542     COPY(lambda2);
543     COPY(picture_in_gop_number);
544     COPY(gop_picture_number);
545     COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
546     COPY(progressive_frame); //FIXME dont set in encode_header
547     COPY(partitioned_frame); //FIXME dont set in encode_header
548 #undef COPY
549 }
550
551 /**
552  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
553  * the changed fields will not depend upon the prior state of the MpegEncContext.
554  */
555 static void MPV_common_defaults(MpegEncContext *s){
556     s->y_dc_scale_table=
557     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
558     s->chroma_qscale_table= ff_default_chroma_qscale_table;
559     s->progressive_frame= 1;
560     s->progressive_sequence= 1;
561     s->picture_structure= PICT_FRAME;
562
563     s->coded_picture_number = 0;
564     s->picture_number = 0;
565     s->input_picture_number = 0;
566
567     s->picture_in_gop_number = 0;
568
569     s->f_code = 1;
570     s->b_code = 1;
571 }
572
573 /**
574  * sets the given MpegEncContext to defaults for decoding.
575  * the changed fields will not depend upon the prior state of the MpegEncContext.
576  */
577 void MPV_decode_defaults(MpegEncContext *s){
578     MPV_common_defaults(s);
579 }
580
581 /**
582  * sets the given MpegEncContext to defaults for encoding.
583  * the changed fields will not depend upon the prior state of the MpegEncContext.
584  */
585
586 #ifdef CONFIG_ENCODERS
587 static void MPV_encode_defaults(MpegEncContext *s){
588     static int done=0;
589     
590     MPV_common_defaults(s);
591     
592     if(!done){
593         int i;
594         done=1;
595
596         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
597         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
598
599         for(i=-16; i<16; i++){
600             default_fcode_tab[i + MAX_MV]= 1;
601         }
602     }
603     s->me.mv_penalty= default_mv_penalty;
604     s->fcode_tab= default_fcode_tab;
605 }
606 #endif //CONFIG_ENCODERS
607
608 /** 
609  * init common structure for both encoder and decoder.
610  * this assumes that some variables like width/height are already set
611  */
612 int MPV_common_init(MpegEncContext *s)
613 {
614     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
615
616     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
617         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
618         return -1;
619     }
620
621     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
622         return -1;
623
624     dsputil_init(&s->dsp, s->avctx);
625     DCT_common_init(s);
626
627     s->flags= s->avctx->flags;
628     s->flags2= s->avctx->flags2;
629
630     s->mb_width  = (s->width  + 15) / 16;
631     s->mb_height = (s->height + 15) / 16;
632     s->mb_stride = s->mb_width + 1;
633     s->b8_stride = s->mb_width*2 + 1;
634     s->b4_stride = s->mb_width*4 + 1;
635     mb_array_size= s->mb_height * s->mb_stride;
636     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
637
638     /* set chroma shifts */
639     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
640                                                     &(s->chroma_y_shift) );
641
642     /* set default edge pos, will be overriden in decode_header if needed */
643     s->h_edge_pos= s->mb_width*16;
644     s->v_edge_pos= s->mb_height*16;
645
646     s->mb_num = s->mb_width * s->mb_height;
647     
648     s->block_wrap[0]=
649     s->block_wrap[1]=
650     s->block_wrap[2]=
651     s->block_wrap[3]= s->b8_stride;
652     s->block_wrap[4]=
653     s->block_wrap[5]= s->mb_stride;
654  
655     y_size = s->b8_stride * (2 * s->mb_height + 1);
656     c_size = s->mb_stride * (s->mb_height + 1);
657     yc_size = y_size + 2 * c_size;
658     
659     /* convert fourcc to upper case */
660     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
661                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
662                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
663                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
664
665     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)          
666                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
667                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
668                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
669
670     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
671
672     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
673     for(y=0; y<s->mb_height; y++){
674         for(x=0; x<s->mb_width; x++){
675             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
676         }
677     }
678     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
679     
680     if (s->encoding) {
681         /* Allocate MV tables */
682         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
683         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
684         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
685         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
686         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
687         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
688         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
689         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
690         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
691         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
692         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
693         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
694
695         if(s->msmpeg4_version){
696             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
697         }
698         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
699
700         /* Allocate MB type table */
701         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
702         
703         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
704         
705         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
706         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
707         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
708         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
709         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
710         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
711         
712         if(s->avctx->noise_reduction){
713             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
714         }
715     }
716     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
717
718     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
719     
720     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
721         /* interlaced direct mode decoding tables */
722             for(i=0; i<2; i++){
723                 int j, k;
724                 for(j=0; j<2; j++){
725                     for(k=0; k<2; k++){
726                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
727                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
728                     }
729                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
730                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
731                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
732                 }
733                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
734             }
735     }
736     if (s->out_format == FMT_H263) {
737         /* ac values */
738         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
739         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
740         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
741         s->ac_val[2] = s->ac_val[1] + c_size;
742         
743         /* cbp values */
744         CHECKED_ALLOCZ(s->coded_block_base, y_size);
745         s->coded_block= s->coded_block_base + s->b8_stride + 1;
746         
747         /* cbp, ac_pred, pred_dir */
748         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
749         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
750     }
751     
752     if (s->h263_pred || s->h263_plus || !s->encoding) {
753         /* dc values */
754         //MN: we need these for error resilience of intra-frames
755         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
756         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
757         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
758         s->dc_val[2] = s->dc_val[1] + c_size;
759         for(i=0;i<yc_size;i++)
760             s->dc_val_base[i] = 1024;
761     }
762
763     /* which mb is a intra block */
764     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
765     memset(s->mbintra_table, 1, mb_array_size);
766     
767     /* init macroblock skip table */
768     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
769     //Note the +1 is for a quicker mpeg4 slice_end detection
770     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
771     
772     s->parse_context.state= -1;
773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
774        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
775        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
776        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
777     }
778
779     s->context_initialized = 1;
780
781     s->thread_context[0]= s;
782     for(i=1; i<s->avctx->thread_count; i++){
783         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
784         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
785     }
786
787     for(i=0; i<s->avctx->thread_count; i++){
788         if(init_duplicate_context(s->thread_context[i], s) < 0)
789            goto fail;
790         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
791         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
792     }
793
794     return 0;
795  fail:
796     MPV_common_end(s);
797     return -1;
798 }
799
800 /* init common structure for both encoder and decoder */
801 void MPV_common_end(MpegEncContext *s)
802 {
803     int i, j, k;
804
805     for(i=0; i<s->avctx->thread_count; i++){
806         free_duplicate_context(s->thread_context[i]);
807     }
808     for(i=1; i<s->avctx->thread_count; i++){
809         av_freep(&s->thread_context[i]);
810     }
811
812     av_freep(&s->parse_context.buffer);
813     s->parse_context.buffer_size=0;
814
815     av_freep(&s->mb_type);
816     av_freep(&s->p_mv_table_base);
817     av_freep(&s->b_forw_mv_table_base);
818     av_freep(&s->b_back_mv_table_base);
819     av_freep(&s->b_bidir_forw_mv_table_base);
820     av_freep(&s->b_bidir_back_mv_table_base);
821     av_freep(&s->b_direct_mv_table_base);
822     s->p_mv_table= NULL;
823     s->b_forw_mv_table= NULL;
824     s->b_back_mv_table= NULL;
825     s->b_bidir_forw_mv_table= NULL;
826     s->b_bidir_back_mv_table= NULL;
827     s->b_direct_mv_table= NULL;
828     for(i=0; i<2; i++){
829         for(j=0; j<2; j++){
830             for(k=0; k<2; k++){
831                 av_freep(&s->b_field_mv_table_base[i][j][k]);
832                 s->b_field_mv_table[i][j][k]=NULL;
833             }
834             av_freep(&s->b_field_select_table[i][j]);
835             av_freep(&s->p_field_mv_table_base[i][j]);
836             s->p_field_mv_table[i][j]=NULL;
837         }
838         av_freep(&s->p_field_select_table[i]);
839     }
840     
841     av_freep(&s->dc_val_base);
842     av_freep(&s->ac_val_base);
843     av_freep(&s->coded_block_base);
844     av_freep(&s->mbintra_table);
845     av_freep(&s->cbp_table);
846     av_freep(&s->pred_dir_table);
847     
848     av_freep(&s->mbskip_table);
849     av_freep(&s->prev_pict_types);
850     av_freep(&s->bitstream_buffer);
851     s->allocated_bitstream_buffer_size=0;
852
853     av_freep(&s->avctx->stats_out);
854     av_freep(&s->ac_stats);
855     av_freep(&s->error_status_table);
856     av_freep(&s->mb_index2xy);
857     av_freep(&s->lambda_table);
858     av_freep(&s->q_intra_matrix);
859     av_freep(&s->q_inter_matrix);
860     av_freep(&s->q_intra_matrix16);
861     av_freep(&s->q_inter_matrix16);
862     av_freep(&s->input_picture);
863     av_freep(&s->reordered_input_picture);
864     av_freep(&s->dct_offset);
865
866     if(s->picture){
867         for(i=0; i<MAX_PICTURE_COUNT; i++){
868             free_picture(s, &s->picture[i]);
869         }
870     }
871     av_freep(&s->picture);
872     s->context_initialized = 0;
873     s->last_picture_ptr=
874     s->next_picture_ptr=
875     s->current_picture_ptr= NULL;
876     s->linesize= s->uvlinesize= 0;
877
878     for(i=0; i<3; i++)
879         av_freep(&s->visualization_buffer[i]);
880
881     avcodec_default_free_buffers(s->avctx);
882 }
883
884 #ifdef CONFIG_ENCODERS
885
886 /* init video encoder */
887 int MPV_encode_init(AVCodecContext *avctx)
888 {
889     MpegEncContext *s = avctx->priv_data;
890     int i, dummy;
891     int chroma_h_shift, chroma_v_shift;
892     
893     MPV_encode_defaults(s);
894
895     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
896         av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
897         return -1;
898     }
899
900     if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
901         if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUVJ420P){
902             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
903             return -1;
904         }
905     }else{
906         if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUV420P){
907             av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
908             return -1;
909         }
910     }
911
912     s->bit_rate = avctx->bit_rate;
913     s->width = avctx->width;
914     s->height = avctx->height;
915     if(avctx->gop_size > 600){
916         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
917         avctx->gop_size=600;
918     }
919     s->gop_size = avctx->gop_size;
920     s->avctx = avctx;
921     s->flags= avctx->flags;
922     s->flags2= avctx->flags2;
923     s->max_b_frames= avctx->max_b_frames;
924     s->codec_id= avctx->codec->id;
925     s->luma_elim_threshold  = avctx->luma_elim_threshold;
926     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
927     s->strict_std_compliance= avctx->strict_std_compliance;
928     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
929     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
930     s->mpeg_quant= avctx->mpeg_quant;
931     s->rtp_mode= !!avctx->rtp_payload_size;
932     s->intra_dc_precision= avctx->intra_dc_precision;
933     s->user_specified_pts = AV_NOPTS_VALUE;
934
935     if (s->gop_size <= 1) {
936         s->intra_only = 1;
937         s->gop_size = 12;
938     } else {
939         s->intra_only = 0;
940     }
941
942     s->me_method = avctx->me_method;
943
944     /* Fixed QSCALE */
945     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
946     
947     s->adaptive_quant= (   s->avctx->lumi_masking
948                         || s->avctx->dark_masking
949                         || s->avctx->temporal_cplx_masking 
950                         || s->avctx->spatial_cplx_masking
951                         || s->avctx->p_masking
952                         || s->avctx->border_masking
953                         || (s->flags&CODEC_FLAG_QP_RD))
954                        && !s->fixed_qscale;
955     
956     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
957     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
958     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
959
960     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
961         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
962         return -1;
963     }    
964
965     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
966         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n");
967     }
968     
969     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
970         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
971         return -1;
972     }
973     
974     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
975         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
976         return -1;
977     }
978         
979     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate 
980        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
981        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
982         
983         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
984     }
985        
986     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
987        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
988         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
989         return -1;
990     }
991         
992     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
993         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
994         return -1;
995     }
996     
997     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
998         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
999         return -1;
1000     }
1001     
1002     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1003         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1004         return -1;
1005     }
1006
1007     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1008         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1009         return -1;
1010     }
1011     
1012     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1013         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1014         return -1;
1015     }
1016
1017     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN)) 
1018        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1019         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1020         return -1;
1021     }
1022         
1023     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1024         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
1025         return -1;
1026     }
1027         
1028     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1029         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1030         return -1;
1031     }
1032
1033     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1034         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1035         return -1;
1036     }
1037     
1038     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1039         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1040         return -1;
1041     }
1042     
1043     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 
1044        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO 
1045        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1046         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1047         return -1;
1048     }
1049     
1050     if(s->avctx->thread_count > 1)
1051         s->rtp_mode= 1;
1052
1053     if(!avctx->frame_rate || !avctx->frame_rate_base){
1054         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1055         return -1;
1056     }
1057         
1058     i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
1059     if(i > 1){
1060         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1061         avctx->frame_rate /= i;
1062         avctx->frame_rate_base /= i;
1063 //        return -1;
1064     }
1065     
1066     if(s->codec_id==CODEC_ID_MJPEG){
1067         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1068         s->inter_quant_bias= 0;
1069     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1070         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1071         s->inter_quant_bias= 0;
1072     }else{
1073         s->intra_quant_bias=0;
1074         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1075     }
1076     
1077     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1078         s->intra_quant_bias= avctx->intra_quant_bias;
1079     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1080         s->inter_quant_bias= avctx->inter_quant_bias;
1081         
1082     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1083
1084     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
1085     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
1086
1087     switch(avctx->codec->id) {
1088     case CODEC_ID_MPEG1VIDEO:
1089         s->out_format = FMT_MPEG1;
1090         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1091         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1092         break;
1093     case CODEC_ID_MPEG2VIDEO:
1094         s->out_format = FMT_MPEG1;
1095         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1096         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1097         s->rtp_mode= 1;
1098         break;
1099     case CODEC_ID_LJPEG:
1100     case CODEC_ID_MJPEG:
1101         s->out_format = FMT_MJPEG;
1102         s->intra_only = 1; /* force intra only for jpeg */
1103         s->mjpeg_write_tables = 1; /* write all tables */
1104         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1105         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1106         s->mjpeg_vsample[1] = 1;
1107         s->mjpeg_vsample[2] = 1; 
1108         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1109         s->mjpeg_hsample[1] = 1; 
1110         s->mjpeg_hsample[2] = 1; 
1111         if (mjpeg_init(s) < 0)
1112             return -1;
1113         avctx->delay=0;
1114         s->low_delay=1;
1115         break;
1116     case CODEC_ID_H261:
1117         s->out_format = FMT_H261;
1118         avctx->delay=0;
1119         s->low_delay=1;
1120         break;
1121     case CODEC_ID_H263:
1122         if (h263_get_picture_format(s->width, s->height) == 7) {
1123             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1124             return -1;
1125         }
1126         s->out_format = FMT_H263;
1127         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1128         avctx->delay=0;
1129         s->low_delay=1;
1130         break;
1131     case CODEC_ID_H263P:
1132         s->out_format = FMT_H263;
1133         s->h263_plus = 1;
1134         /* Fx */
1135         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1136         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1137         s->modified_quant= s->h263_aic;
1138         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1139         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1140         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1141         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1142         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1143
1144         /* /Fx */
1145         /* These are just to be sure */
1146         avctx->delay=0;
1147         s->low_delay=1;
1148         break;
1149     case CODEC_ID_FLV1:
1150         s->out_format = FMT_H263;
1151         s->h263_flv = 2; /* format = 1; 11-bit codes */
1152         s->unrestricted_mv = 1;
1153         s->rtp_mode=0; /* don't allow GOB */
1154         avctx->delay=0;
1155         s->low_delay=1;
1156         break;
1157     case CODEC_ID_RV10:
1158         s->out_format = FMT_H263;
1159         avctx->delay=0;
1160         s->low_delay=1;
1161         break;
1162     case CODEC_ID_RV20:
1163         s->out_format = FMT_H263;
1164         avctx->delay=0;
1165         s->low_delay=1;
1166         s->modified_quant=1;
1167         s->h263_aic=1;
1168         s->h263_plus=1;
1169         s->loop_filter=1;
1170         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1171         break;
1172     case CODEC_ID_MPEG4:
1173         s->out_format = FMT_H263;
1174         s->h263_pred = 1;
1175         s->unrestricted_mv = 1;
1176         s->low_delay= s->max_b_frames ? 0 : 1;
1177         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1178         break;
1179     case CODEC_ID_MSMPEG4V1:
1180         s->out_format = FMT_H263;
1181         s->h263_msmpeg4 = 1;
1182         s->h263_pred = 1;
1183         s->unrestricted_mv = 1;
1184         s->msmpeg4_version= 1;
1185         avctx->delay=0;
1186         s->low_delay=1;
1187         break;
1188     case CODEC_ID_MSMPEG4V2:
1189         s->out_format = FMT_H263;
1190         s->h263_msmpeg4 = 1;
1191         s->h263_pred = 1;
1192         s->unrestricted_mv = 1;
1193         s->msmpeg4_version= 2;
1194         avctx->delay=0;
1195         s->low_delay=1;
1196         break;
1197     case CODEC_ID_MSMPEG4V3:
1198         s->out_format = FMT_H263;
1199         s->h263_msmpeg4 = 1;
1200         s->h263_pred = 1;
1201         s->unrestricted_mv = 1;
1202         s->msmpeg4_version= 3;
1203         s->flipflop_rounding=1;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_WMV1:
1208         s->out_format = FMT_H263;
1209         s->h263_msmpeg4 = 1;
1210         s->h263_pred = 1;
1211         s->unrestricted_mv = 1;
1212         s->msmpeg4_version= 4;
1213         s->flipflop_rounding=1;
1214         avctx->delay=0;
1215         s->low_delay=1;
1216         break;
1217     case CODEC_ID_WMV2:
1218         s->out_format = FMT_H263;
1219         s->h263_msmpeg4 = 1;
1220         s->h263_pred = 1;
1221         s->unrestricted_mv = 1;
1222         s->msmpeg4_version= 5;
1223         s->flipflop_rounding=1;
1224         avctx->delay=0;
1225         s->low_delay=1;
1226         break;
1227     default:
1228         return -1;
1229     }
1230     
1231     avctx->has_b_frames= !s->low_delay;
1232
1233     s->encoding = 1;
1234
1235     /* init */
1236     if (MPV_common_init(s) < 0)
1237         return -1;
1238
1239     if(s->modified_quant)
1240         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1241     s->progressive_frame= 
1242     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1243     s->quant_precision=5;
1244     
1245     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1246     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1247     
1248 #ifdef CONFIG_ENCODERS
1249     if (s->out_format == FMT_H261)
1250         ff_h261_encode_init(s);
1251     if (s->out_format == FMT_H263)
1252         h263_encode_init(s);
1253     if(s->msmpeg4_version)
1254         ff_msmpeg4_encode_init(s);
1255     if (s->out_format == FMT_MPEG1)
1256         ff_mpeg1_encode_init(s);
1257 #endif
1258
1259     /* init q matrix */
1260     for(i=0;i<64;i++) {
1261         int j= s->dsp.idct_permutation[i];
1262         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1263             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1264             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1265         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1266             s->intra_matrix[j] =
1267             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1268         }else
1269         { /* mpeg1/2 */
1270             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1271             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1272         }
1273         if(s->avctx->intra_matrix)
1274             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1275         if(s->avctx->inter_matrix)
1276             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1277     }
1278
1279     /* precompute matrix */
1280     /* for mjpeg, we do include qscale in the matrix */
1281     if (s->out_format != FMT_MJPEG) {
1282         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
1283                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1284         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, 
1285                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1286     }
1287
1288     if(ff_rate_control_init(s) < 0)
1289         return -1;
1290     
1291     return 0;
1292 }
1293
1294 int MPV_encode_end(AVCodecContext *avctx)
1295 {
1296     MpegEncContext *s = avctx->priv_data;
1297
1298 #ifdef STATS
1299     print_stats();
1300 #endif
1301
1302     ff_rate_control_uninit(s);
1303
1304     MPV_common_end(s);
1305     if (s->out_format == FMT_MJPEG)
1306         mjpeg_close(s);
1307
1308     av_freep(&avctx->extradata);
1309       
1310     return 0;
1311 }
1312
1313 #endif //CONFIG_ENCODERS
1314
1315 void init_rl(RLTable *rl, int use_static)
1316 {
1317     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1318     uint8_t index_run[MAX_RUN+1];
1319     int last, run, level, start, end, i;
1320
1321     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1322     if(use_static && rl->max_level[0])
1323         return;
1324
1325     /* compute max_level[], max_run[] and index_run[] */
1326     for(last=0;last<2;last++) {
1327         if (last == 0) {
1328             start = 0;
1329             end = rl->last;
1330         } else {
1331             start = rl->last;
1332             end = rl->n;
1333         }
1334
1335         memset(max_level, 0, MAX_RUN + 1);
1336         memset(max_run, 0, MAX_LEVEL + 1);
1337         memset(index_run, rl->n, MAX_RUN + 1);
1338         for(i=start;i<end;i++) {
1339             run = rl->table_run[i];
1340             level = rl->table_level[i];
1341             if (index_run[run] == rl->n)
1342                 index_run[run] = i;
1343             if (level > max_level[run])
1344                 max_level[run] = level;
1345             if (run > max_run[level])
1346                 max_run[level] = run;
1347         }
1348         if(use_static)
1349             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1350         else
1351             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1352         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1353         if(use_static)
1354             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1355         else
1356             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1357         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1358         if(use_static)
1359             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1360         else
1361             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1362         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1363     }
1364 }
1365
1366 /* draw the edges of width 'w' of an image of size width, height */
1367 //FIXME check that this is ok for mpeg4 interlaced
1368 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1369 {
1370     uint8_t *ptr, *last_line;
1371     int i;
1372
1373     last_line = buf + (height - 1) * wrap;
1374     for(i=0;i<w;i++) {
1375         /* top and bottom */
1376         memcpy(buf - (i + 1) * wrap, buf, width);
1377         memcpy(last_line + (i + 1) * wrap, last_line, width);
1378     }
1379     /* left and right */
1380     ptr = buf;
1381     for(i=0;i<height;i++) {
1382         memset(ptr - w, ptr[0], w);
1383         memset(ptr + width, ptr[width-1], w);
1384         ptr += wrap;
1385     }
1386     /* corners */
1387     for(i=0;i<w;i++) {
1388         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1389         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1390         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1391         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1392     }
1393 }
1394
1395 int ff_find_unused_picture(MpegEncContext *s, int shared){
1396     int i;
1397     
1398     if(shared){
1399         for(i=0; i<MAX_PICTURE_COUNT; i++){
1400             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1401         }
1402     }else{
1403         for(i=0; i<MAX_PICTURE_COUNT; i++){
1404             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1405         }
1406         for(i=0; i<MAX_PICTURE_COUNT; i++){
1407             if(s->picture[i].data[0]==NULL) return i;
1408         }
1409     }
1410
1411     assert(0);
1412     return -1;
1413 }
1414
1415 static void update_noise_reduction(MpegEncContext *s){
1416     int intra, i;
1417
1418     for(intra=0; intra<2; intra++){
1419         if(s->dct_count[intra] > (1<<16)){
1420             for(i=0; i<64; i++){
1421                 s->dct_error_sum[intra][i] >>=1;
1422             }
1423             s->dct_count[intra] >>= 1;
1424         }
1425         
1426         for(i=0; i<64; i++){
1427             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1428         }
1429     }
1430 }
1431
1432 /**
1433  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1434  */
1435 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1436 {
1437     int i;
1438     AVFrame *pic;
1439     s->mb_skiped = 0;
1440
1441     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1442
1443     /* mark&release old frames */
1444     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1445         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1446
1447         /* release forgotten pictures */
1448         /* if(mpeg124/h263) */
1449         if(!s->encoding){
1450             for(i=0; i<MAX_PICTURE_COUNT; i++){
1451                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1452                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1453                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
1454                 }
1455             }
1456         }
1457     }
1458 alloc:
1459     if(!s->encoding){
1460         /* release non refernce frames */
1461         for(i=0; i<MAX_PICTURE_COUNT; i++){
1462             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1463                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1464             }
1465         }
1466
1467         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1468             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1469         else{
1470             i= ff_find_unused_picture(s, 0);
1471             pic= (AVFrame*)&s->picture[i];
1472         }
1473
1474         pic->reference= s->pict_type != B_TYPE && !s->dropable ? 3 : 0;
1475
1476         pic->coded_picture_number= s->coded_picture_number++;
1477         
1478         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1479             return -1;
1480
1481         s->current_picture_ptr= (Picture*)pic;
1482         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1483         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1484     }
1485
1486     s->current_picture_ptr->pict_type= s->pict_type;
1487 //    if(s->flags && CODEC_FLAG_QSCALE) 
1488   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1489     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1490
1491     copy_picture(&s->current_picture, s->current_picture_ptr);
1492   
1493   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1494     if (s->pict_type != B_TYPE) {
1495         s->last_picture_ptr= s->next_picture_ptr;
1496         if(!s->dropable)
1497             s->next_picture_ptr= s->current_picture_ptr;
1498     }
1499 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1500         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL, 
1501         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL, 
1502         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1503         s->pict_type, s->dropable);*/
1504     
1505     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1506     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1507     
1508     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1509         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1510         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1511         goto alloc;
1512     }
1513
1514     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1515
1516     if(s->picture_structure!=PICT_FRAME){
1517         int i;
1518         for(i=0; i<4; i++){
1519             if(s->picture_structure == PICT_BOTTOM_FIELD){
1520                  s->current_picture.data[i] += s->current_picture.linesize[i];
1521             } 
1522             s->current_picture.linesize[i] *= 2;
1523             s->last_picture.linesize[i] *=2;
1524             s->next_picture.linesize[i] *=2;
1525         }
1526     }
1527   }
1528    
1529     s->hurry_up= s->avctx->hurry_up;
1530     s->error_resilience= avctx->error_resilience;
1531
1532     /* set dequantizer, we cant do it during init as it might change for mpeg4
1533        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1534     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1535         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1536         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1537     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1538         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1539         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1540     }else{
1541         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1542         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1543     }
1544
1545     if(s->dct_error_sum){
1546         assert(s->avctx->noise_reduction && s->encoding);
1547
1548         update_noise_reduction(s);
1549     }
1550         
1551 #ifdef HAVE_XVMC
1552     if(s->avctx->xvmc_acceleration)
1553         return XVMC_field_start(s, avctx);
1554 #endif
1555     return 0;
1556 }
1557
1558 /* generic function for encode/decode called after a frame has been coded/decoded */
1559 void MPV_frame_end(MpegEncContext *s)
1560 {
1561     int i;
1562     /* draw edge for correct motion prediction if outside */
1563 #ifdef HAVE_XVMC
1564 //just to make sure that all data is rendered.
1565     if(s->avctx->xvmc_acceleration){
1566         XVMC_field_end(s);
1567     }else
1568 #endif
1569     if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1570             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1571             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1572             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1573     }
1574     emms_c();
1575     
1576     s->last_pict_type    = s->pict_type;
1577     if(s->pict_type!=B_TYPE){
1578         s->last_non_b_pict_type= s->pict_type;
1579     }
1580 #if 0
1581         /* copy back current_picture variables */
1582     for(i=0; i<MAX_PICTURE_COUNT; i++){
1583         if(s->picture[i].data[0] == s->current_picture.data[0]){
1584             s->picture[i]= s->current_picture;
1585             break;
1586         }    
1587     }
1588     assert(i<MAX_PICTURE_COUNT);
1589 #endif    
1590
1591     if(s->encoding){
1592         /* release non refernce frames */
1593         for(i=0; i<MAX_PICTURE_COUNT; i++){
1594             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1595                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1596             }
1597         }
1598     }
1599     // clear copies, to avoid confusion
1600 #if 0
1601     memset(&s->last_picture, 0, sizeof(Picture));
1602     memset(&s->next_picture, 0, sizeof(Picture));
1603     memset(&s->current_picture, 0, sizeof(Picture));
1604 #endif
1605     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1606 }
1607
1608 /**
1609  * draws an line from (ex, ey) -> (sx, sy).
1610  * @param w width of the image
1611  * @param h height of the image
1612  * @param stride stride/linesize of the image
1613  * @param color color of the arrow
1614  */
1615 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1616     int t, x, y, fr, f;
1617     
1618     sx= clip(sx, 0, w-1);
1619     sy= clip(sy, 0, h-1);
1620     ex= clip(ex, 0, w-1);
1621     ey= clip(ey, 0, h-1);
1622     
1623     buf[sy*stride + sx]+= color;
1624     
1625     if(ABS(ex - sx) > ABS(ey - sy)){
1626         if(sx > ex){
1627             t=sx; sx=ex; ex=t;
1628             t=sy; sy=ey; ey=t;
1629         }
1630         buf+= sx + sy*stride;
1631         ex-= sx;
1632         f= ((ey-sy)<<16)/ex;
1633         for(x= 0; x <= ex; x++){
1634             y = (x*f)>>16;
1635             fr= (x*f)&0xFFFF;
1636             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1637             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1638         }
1639     }else{
1640         if(sy > ey){
1641             t=sx; sx=ex; ex=t;
1642             t=sy; sy=ey; ey=t;
1643         }
1644         buf+= sx + sy*stride;
1645         ey-= sy;
1646         if(ey) f= ((ex-sx)<<16)/ey;
1647         else   f= 0;
1648         for(y= 0; y <= ey; y++){
1649             x = (y*f)>>16;
1650             fr= (y*f)&0xFFFF;
1651             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1652             buf[y*stride + x+1]+= (color*         fr )>>16;;
1653         }
1654     }
1655 }
1656
1657 /**
1658  * draws an arrow from (ex, ey) -> (sx, sy).
1659  * @param w width of the image
1660  * @param h height of the image
1661  * @param stride stride/linesize of the image
1662  * @param color color of the arrow
1663  */
1664 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
1665     int dx,dy;
1666
1667     sx= clip(sx, -100, w+100);
1668     sy= clip(sy, -100, h+100);
1669     ex= clip(ex, -100, w+100);
1670     ey= clip(ey, -100, h+100);
1671     
1672     dx= ex - sx;
1673     dy= ey - sy;
1674     
1675     if(dx*dx + dy*dy > 3*3){
1676         int rx=  dx + dy;
1677         int ry= -dx + dy;
1678         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1679         
1680         //FIXME subpixel accuracy
1681         rx= ROUNDED_DIV(rx*3<<4, length);
1682         ry= ROUNDED_DIV(ry*3<<4, length);
1683         
1684         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1685         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1686     }
1687     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1688 }
1689
1690 /**
1691  * prints debuging info for the given picture.
1692  */
1693 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1694
1695     if(!pict || !pict->mb_type) return;
1696
1697     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1698         int x,y;
1699         
1700         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1701         switch (pict->pict_type) {
1702             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1703             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1704             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1705             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1706             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1707             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;            
1708         }
1709         for(y=0; y<s->mb_height; y++){
1710             for(x=0; x<s->mb_width; x++){
1711                 if(s->avctx->debug&FF_DEBUG_SKIP){
1712                     int count= s->mbskip_table[x + y*s->mb_stride];
1713                     if(count>9) count=9;
1714                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1715                 }
1716                 if(s->avctx->debug&FF_DEBUG_QP){
1717                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1718                 }
1719                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1720                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1721                     //Type & MV direction
1722                     if(IS_PCM(mb_type))
1723                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1724                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1725                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1726                     else if(IS_INTRA4x4(mb_type))
1727                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1728                     else if(IS_INTRA16x16(mb_type))
1729                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1730                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1731                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1732                     else if(IS_DIRECT(mb_type))
1733                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1734                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1735                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1736                     else if(IS_GMC(mb_type))
1737                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1738                     else if(IS_SKIP(mb_type))
1739                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1740                     else if(!USES_LIST(mb_type, 1))
1741                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1742                     else if(!USES_LIST(mb_type, 0))
1743                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1744                     else{
1745                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1746                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1747                     }
1748                     
1749                     //segmentation
1750                     if(IS_8X8(mb_type))
1751                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1752                     else if(IS_16X8(mb_type))
1753                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1754                     else if(IS_8X16(mb_type))
1755                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1756                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1757                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1758                     else
1759                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1760                     
1761                         
1762                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1763                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1764                     else
1765                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1766                 }
1767 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1768             }
1769             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1770         }
1771     }
1772
1773     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1774         const int shift= 1 + s->quarter_sample;
1775         int mb_y;
1776         uint8_t *ptr;
1777         int i;
1778         int h_chroma_shift, v_chroma_shift;
1779         const int width = s->avctx->width;
1780         const int height= s->avctx->height;
1781         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1782         const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1783         s->low_delay=0; //needed to see the vectors without trashing the buffers
1784
1785         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1786         for(i=0; i<3; i++){
1787             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1788             pict->data[i]= s->visualization_buffer[i];
1789         }
1790         pict->type= FF_BUFFER_TYPE_COPY;
1791         ptr= pict->data[0];
1792
1793         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1794             int mb_x;
1795             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1796                 const int mb_index= mb_x + mb_y*s->mb_stride;
1797                 if((s->avctx->debug_mv) && pict->motion_val){
1798                   int type;
1799                   for(type=0; type<3; type++){
1800                     int direction = 0;
1801                     switch (type) {
1802                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1803                                 continue;
1804                               direction = 0;
1805                               break;
1806                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1807                                 continue;
1808                               direction = 0;
1809                               break;
1810                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1811                                 continue;
1812                               direction = 1;
1813                               break;
1814                     }
1815                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1816                         continue;
1817
1818                     if(IS_8X8(pict->mb_type[mb_index])){
1819                       int i;
1820                       for(i=0; i<4; i++){
1821                         int sx= mb_x*16 + 4 + 8*(i&1);
1822                         int sy= mb_y*16 + 4 + 8*(i>>1);
1823                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1824                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1825                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1826                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1827                       }
1828                     }else if(IS_16X8(pict->mb_type[mb_index])){
1829                       int i;
1830                       for(i=0; i<2; i++){
1831                         int sx=mb_x*16 + 8;
1832                         int sy=mb_y*16 + 4 + 8*i;
1833                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1834                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1835                         int my=(pict->motion_val[direction][xy][1]>>shift);
1836                         
1837                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1838                             my*=2;
1839                         
1840                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1841                       }
1842                     }else if(IS_8X16(pict->mb_type[mb_index])){
1843                       int i;
1844                       for(i=0; i<2; i++){
1845                         int sx=mb_x*16 + 4 + 8*i;
1846                         int sy=mb_y*16 + 8;
1847                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1848                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1849                         int my=(pict->motion_val[direction][xy][1]>>shift);
1850                         
1851                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1852                             my*=2;
1853                         
1854                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1855                       }
1856                     }else{
1857                       int sx= mb_x*16 + 8;
1858                       int sy= mb_y*16 + 8;
1859                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1860                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1861                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1862                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1863                     }
1864                   }                  
1865                 }
1866                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1867                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1868                     int y;
1869                     for(y=0; y<8; y++){
1870                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1871                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1872                     }
1873                 }
1874                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1875                     int mb_type= pict->mb_type[mb_index];
1876                     uint64_t u,v;
1877                     int y;
1878 #define COLOR(theta, r)\
1879 u= (int)(128 + r*cos(theta*3.141592/180));\
1880 v= (int)(128 + r*sin(theta*3.141592/180));
1881
1882                     
1883                     u=v=128;
1884                     if(IS_PCM(mb_type)){
1885                         COLOR(120,48)
1886                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1887                         COLOR(30,48)
1888                     }else if(IS_INTRA4x4(mb_type)){
1889                         COLOR(90,48)
1890                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1891 //                        COLOR(120,48)
1892                     }else if(IS_DIRECT(mb_type)){
1893                         COLOR(150,48)
1894                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1895                         COLOR(170,48)
1896                     }else if(IS_GMC(mb_type)){
1897                         COLOR(190,48)
1898                     }else if(IS_SKIP(mb_type)){
1899 //                        COLOR(180,48)
1900                     }else if(!USES_LIST(mb_type, 1)){
1901                         COLOR(240,48)
1902                     }else if(!USES_LIST(mb_type, 0)){
1903                         COLOR(0,48)
1904                     }else{
1905                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1906                         COLOR(300,48)
1907                     }
1908
1909                     u*= 0x0101010101010101ULL;
1910                     v*= 0x0101010101010101ULL;
1911                     for(y=0; y<8; y++){
1912                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1913                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1914                     }
1915
1916                     //segmentation
1917                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1918                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1919                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1920                     }
1921                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1922                         for(y=0; y<16; y++)
1923                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1924                     }
1925                         
1926                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1927                         // hmm
1928                     }
1929                 }
1930                 s->mbskip_table[mb_index]=0;
1931             }
1932         }
1933     }
1934 }
1935
1936 #ifdef CONFIG_ENCODERS
1937
1938 static int get_sae(uint8_t *src, int ref, int stride){
1939     int x,y;
1940     int acc=0;
1941     
1942     for(y=0; y<16; y++){
1943         for(x=0; x<16; x++){
1944             acc+= ABS(src[x+y*stride] - ref);
1945         }
1946     }
1947     
1948     return acc;
1949 }
1950
1951 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1952     int x, y, w, h;
1953     int acc=0;
1954     
1955     w= s->width &~15;
1956     h= s->height&~15;
1957     
1958     for(y=0; y<h; y+=16){
1959         for(x=0; x<w; x+=16){
1960             int offset= x + y*stride;
1961             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1962             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1963             int sae = get_sae(src + offset, mean, stride);
1964             
1965             acc+= sae + 500 < sad;
1966         }
1967     }
1968     return acc;
1969 }
1970
1971
1972 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1973     AVFrame *pic=NULL;
1974     int64_t pts;
1975     int i;
1976     const int encoding_delay= s->max_b_frames;
1977     int direct=1;
1978     
1979     if(pic_arg){
1980         pts= pic_arg->pts;
1981         pic_arg->display_picture_number= s->input_picture_number++;
1982
1983         if(pts != AV_NOPTS_VALUE){ 
1984             if(s->user_specified_pts != AV_NOPTS_VALUE){
1985                 int64_t time= av_rescale(pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE);
1986                 int64_t last= av_rescale(s->user_specified_pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE);
1987             
1988                 if(time <= last){            
1989                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts);
1990                     return -1;
1991                 }
1992             }
1993             s->user_specified_pts= pts;
1994         }else{
1995             if(s->user_specified_pts != AV_NOPTS_VALUE){
1996                 s->user_specified_pts= 
1997                 pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate;
1998                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts);
1999             }else{
2000                 pts= av_rescale(pic_arg->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate);
2001             }
2002         }
2003     }
2004
2005   if(pic_arg){
2006     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2007     if(pic_arg->linesize[0] != s->linesize) direct=0;
2008     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2009     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2010   
2011 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2012     
2013     if(direct){
2014         i= ff_find_unused_picture(s, 1);
2015
2016         pic= (AVFrame*)&s->picture[i];
2017         pic->reference= 3;
2018     
2019         for(i=0; i<4; i++){
2020             pic->data[i]= pic_arg->data[i];
2021             pic->linesize[i]= pic_arg->linesize[i];
2022         }
2023         alloc_picture(s, (Picture*)pic, 1);
2024     }else{
2025         int offset= 16;
2026         i= ff_find_unused_picture(s, 0);
2027
2028         pic= (AVFrame*)&s->picture[i];
2029         pic->reference= 3;
2030
2031         alloc_picture(s, (Picture*)pic, 0);
2032
2033         if(   pic->data[0] + offset == pic_arg->data[0] 
2034            && pic->data[1] + offset == pic_arg->data[1]
2035            && pic->data[2] + offset == pic_arg->data[2]){
2036        // empty
2037         }else{
2038             int h_chroma_shift, v_chroma_shift;
2039             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2040         
2041             for(i=0; i<3; i++){
2042                 int src_stride= pic_arg->linesize[i];
2043                 int dst_stride= i ? s->uvlinesize : s->linesize;
2044                 int h_shift= i ? h_chroma_shift : 0;
2045                 int v_shift= i ? v_chroma_shift : 0;
2046                 int w= s->width >>h_shift;
2047                 int h= s->height>>v_shift;
2048                 uint8_t *src= pic_arg->data[i];
2049                 uint8_t *dst= pic->data[i] + offset;
2050             
2051                 if(src_stride==dst_stride)
2052                     memcpy(dst, src, src_stride*h);
2053                 else{
2054                     while(h--){
2055                         memcpy(dst, src, w);
2056                         dst += dst_stride;
2057                         src += src_stride;
2058                     }
2059                 }
2060             }
2061         }
2062     }
2063     copy_picture_attributes(s, pic, pic_arg);
2064     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2065   }
2066   
2067     /* shift buffer entries */
2068     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2069         s->input_picture[i-1]= s->input_picture[i];
2070         
2071     s->input_picture[encoding_delay]= (Picture*)pic;
2072
2073     return 0;
2074 }
2075
2076 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2077     int x, y, plane;
2078     int score=0;
2079     int64_t score64=0;
2080
2081     for(plane=0; plane<3; plane++){
2082         const int stride= p->linesize[plane];
2083         const int bw= plane ? 1 : 2;
2084         for(y=0; y<s->mb_height*bw; y++){
2085             for(x=0; x<s->mb_width*bw; x++){
2086                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8);
2087                 
2088                 switch(s->avctx->frame_skip_exp){
2089                     case 0: score= FFMAX(score, v); break;
2090                     case 1: score+= ABS(v);break;
2091                     case 2: score+= v*v;break;
2092                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2093                     case 4: score64+= v*v*(int64_t)(v*v);break;
2094                 }
2095             }
2096         }
2097     }
2098     
2099     if(score) score64= score;
2100
2101     if(score64 < s->avctx->frame_skip_threshold)
2102         return 1;
2103     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2104         return 1;
2105     return 0;
2106 }
2107
2108 static void select_input_picture(MpegEncContext *s){
2109     int i;
2110
2111     for(i=1; i<MAX_PICTURE_COUNT; i++)
2112         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2113     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2114
2115     /* set next picture types & ordering */
2116     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2117         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2118             s->reordered_input_picture[0]= s->input_picture[0];
2119             s->reordered_input_picture[0]->pict_type= I_TYPE;
2120             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2121         }else{
2122             int b_frames;
2123
2124             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2125                 if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2126 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2127                 
2128                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2129                         for(i=0; i<4; i++)
2130                             s->input_picture[0]->data[i]= NULL;
2131                         s->input_picture[0]->type= 0;            
2132                     }else{
2133                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER 
2134                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2135             
2136                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2137                     }
2138
2139                     goto no_output_pic;
2140                 }
2141             }
2142
2143             if(s->flags&CODEC_FLAG_PASS2){
2144                 for(i=0; i<s->max_b_frames+1; i++){
2145                     int pict_num= s->input_picture[0]->display_picture_number + i;
2146
2147                     if(pict_num >= s->rc_context.num_entries) 
2148                         break;
2149                     if(!s->input_picture[i]){
2150                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2151                         break;
2152                     }
2153
2154                     s->input_picture[i]->pict_type= 
2155                         s->rc_context.entry[pict_num].new_pict_type;
2156                 }
2157             }
2158
2159             if(s->avctx->b_frame_strategy==0){
2160                 b_frames= s->max_b_frames;
2161                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2162             }else if(s->avctx->b_frame_strategy==1){
2163                 for(i=1; i<s->max_b_frames+1; i++){
2164                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2165                         s->input_picture[i]->b_frame_score= 
2166                             get_intra_count(s, s->input_picture[i  ]->data[0], 
2167                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2168                     }
2169                 }
2170                 for(i=0; i<s->max_b_frames; i++){
2171                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2172                 }
2173                                 
2174                 b_frames= FFMAX(0, i-1);
2175                 
2176                 /* reset scores */
2177                 for(i=0; i<b_frames+1; i++){
2178                     s->input_picture[i]->b_frame_score=0;
2179                 }
2180             }else{
2181                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2182                 b_frames=0;
2183             }
2184
2185             emms_c();
2186 //static int b_count=0;
2187 //b_count+= b_frames;
2188 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2189
2190             for(i= b_frames - 1; i>=0; i--){
2191                 int type= s->input_picture[i]->pict_type;
2192                 if(type && type != B_TYPE)
2193                     b_frames= i;
2194             }
2195             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2196                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
2197             }
2198
2199             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2200               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2201                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2202               }else{
2203                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2204                     b_frames=0;
2205                 s->input_picture[b_frames]->pict_type= I_TYPE;
2206               }
2207             }
2208             
2209             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2210                && b_frames
2211                && s->input_picture[b_frames]->pict_type== I_TYPE)
2212                 b_frames--;
2213
2214             s->reordered_input_picture[0]= s->input_picture[b_frames];
2215             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2216                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2217             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2218             for(i=0; i<b_frames; i++){
2219                 s->reordered_input_picture[i+1]= s->input_picture[i];
2220                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2221                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2222             }
2223         }
2224     }
2225 no_output_pic:
2226     if(s->reordered_input_picture[0]){
2227         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2228
2229         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2230
2231         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2232             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
2233         
2234             int i= ff_find_unused_picture(s, 0);
2235             Picture *pic= &s->picture[i];
2236
2237             /* mark us unused / free shared pic */
2238             for(i=0; i<4; i++)
2239                 s->reordered_input_picture[0]->data[i]= NULL;
2240             s->reordered_input_picture[0]->type= 0;
2241             
2242             pic->reference              = s->reordered_input_picture[0]->reference;
2243             
2244             alloc_picture(s, pic, 0);
2245
2246             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2247
2248             s->current_picture_ptr= pic;
2249         }else{
2250             // input is not a shared pix -> reuse buffer for current_pix
2251
2252             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
2253                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2254             
2255             s->current_picture_ptr= s->reordered_input_picture[0];
2256             for(i=0; i<4; i++){
2257                 s->new_picture.data[i]+=16;
2258             }
2259         }
2260         copy_picture(&s->current_picture, s->current_picture_ptr);
2261     
2262         s->picture_number= s->new_picture.display_picture_number;
2263 //printf("dpn:%d\n", s->picture_number);
2264     }else{
2265        memset(&s->new_picture, 0, sizeof(Picture));
2266     }
2267 }
2268
2269 int MPV_encode_picture(AVCodecContext *avctx,
2270                        unsigned char *buf, int buf_size, void *data)
2271 {
2272     MpegEncContext *s = avctx->priv_data;
2273     AVFrame *pic_arg = data;
2274     int i, stuffing_count;
2275
2276     if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2277         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2278         return -1;
2279     }
2280     
2281     for(i=0; i<avctx->thread_count; i++){
2282         int start_y= s->thread_context[i]->start_mb_y;
2283         int   end_y= s->thread_context[i]->  end_mb_y;
2284         int h= s->mb_height;
2285         uint8_t *start= buf + buf_size*start_y/h;
2286         uint8_t *end  = buf + buf_size*  end_y/h;
2287
2288         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2289     }
2290
2291     s->picture_in_gop_number++;
2292
2293     if(load_input_picture(s, pic_arg) < 0)
2294         return -1;
2295     
2296     select_input_picture(s);
2297     
2298     /* output? */
2299     if(s->new_picture.data[0]){
2300         s->pict_type= s->new_picture.pict_type;
2301 //emms_c();
2302 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2303         MPV_frame_start(s, avctx);
2304
2305         encode_picture(s, s->picture_number);
2306         
2307         avctx->real_pict_num  = s->picture_number;
2308         avctx->header_bits = s->header_bits;
2309         avctx->mv_bits     = s->mv_bits;
2310         avctx->misc_bits   = s->misc_bits;
2311         avctx->i_tex_bits  = s->i_tex_bits;
2312         avctx->p_tex_bits  = s->p_tex_bits;
2313         avctx->i_count     = s->i_count;
2314         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2315         avctx->skip_count  = s->skip_count;
2316
2317         MPV_frame_end(s);
2318
2319         if (s->out_format == FMT_MJPEG)
2320             mjpeg_picture_trailer(s);
2321         
2322         if(s->flags&CODEC_FLAG_PASS1)
2323             ff_write_pass1_stats(s);
2324
2325         for(i=0; i<4; i++){
2326             avctx->error[i] += s->current_picture_ptr->error[i];
2327         }
2328
2329         if(s->flags&CODEC_FLAG_PASS1)
2330             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2331         flush_put_bits(&s->pb);
2332         s->frame_bits  = put_bits_count(&s->pb);
2333
2334         stuffing_count= ff_vbv_update(s, s->frame_bits);
2335         if(stuffing_count){
2336             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2337                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2338                 return -1;
2339             }
2340
2341             switch(s->codec_id){
2342             case CODEC_ID_MPEG1VIDEO:
2343             case CODEC_ID_MPEG2VIDEO:
2344                 while(stuffing_count--){
2345                     put_bits(&s->pb, 8, 0);
2346                 }
2347             break;
2348             case CODEC_ID_MPEG4:
2349                 put_bits(&s->pb, 16, 0);
2350                 put_bits(&s->pb, 16, 0x1C3);
2351                 stuffing_count -= 4;
2352                 while(stuffing_count--){
2353                     put_bits(&s->pb, 8, 0xFF);
2354                 }
2355             break;
2356             default:
2357                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2358             }
2359             flush_put_bits(&s->pb);
2360             s->frame_bits  = put_bits_count(&s->pb);
2361         }
2362
2363         /* update mpeg1/2 vbv_delay for CBR */    
2364         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2365            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2366             int vbv_delay;
2367
2368             assert(s->repeat_first_field==0);
2369             
2370             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2371             assert(vbv_delay < 0xFFFF);
2372
2373             s->vbv_delay_ptr[0] &= 0xF8;
2374             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2375             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2376             s->vbv_delay_ptr[2] &= 0x07;
2377             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2378         }
2379         s->total_bits += s->frame_bits;
2380         avctx->frame_bits  = s->frame_bits;
2381     }else{
2382         assert((pbBufPtr(&s->pb) == s->pb.buf));
2383         s->frame_bits=0;
2384     }
2385     assert((s->frame_bits&7)==0);
2386     
2387     return s->frame_bits/8;
2388 }
2389
2390 #endif //CONFIG_ENCODERS
2391
2392 static inline void gmc1_motion(MpegEncContext *s,
2393                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2394                                uint8_t **ref_picture)
2395 {
2396     uint8_t *ptr;
2397     int offset, src_x, src_y, linesize, uvlinesize;
2398     int motion_x, motion_y;
2399     int emu=0;
2400
2401     motion_x= s->sprite_offset[0][0];
2402     motion_y= s->sprite_offset[0][1];
2403     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2404     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2405     motion_x<<=(3-s->sprite_warping_accuracy);
2406     motion_y<<=(3-s->sprite_warping_accuracy);
2407     src_x = clip(src_x, -16, s->width);
2408     if (src_x == s->width)
2409         motion_x =0;
2410     src_y = clip(src_y, -16, s->height);
2411     if (src_y == s->height)
2412         motion_y =0;
2413
2414     linesize = s->linesize;
2415     uvlinesize = s->uvlinesize;
2416     
2417     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2418
2419     if(s->flags&CODEC_FLAG_EMU_EDGE){
2420         if(   (unsigned)src_x >= s->h_edge_pos - 17
2421            || (unsigned)src_y >= s->v_edge_pos - 17){
2422             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2423             ptr= s->edge_emu_buffer;
2424         }
2425     }
2426     
2427     if((motion_x|motion_y)&7){
2428         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2429         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2430     }else{
2431         int dxy;
2432         
2433         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2434         if (s->no_rounding){
2435             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2436         }else{
2437             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2438         }
2439     }
2440     
2441     if(s->flags&CODEC_FLAG_GRAY) return;
2442
2443     motion_x= s->sprite_offset[1][0];
2444     motion_y= s->sprite_offset[1][1];
2445     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2446     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2447     motion_x<<=(3-s->sprite_warping_accuracy);
2448     motion_y<<=(3-s->sprite_warping_accuracy);
2449     src_x = clip(src_x, -8, s->width>>1);
2450     if (src_x == s->width>>1)
2451         motion_x =0;
2452     src_y = clip(src_y, -8, s->height>>1);
2453     if (src_y == s->height>>1)
2454         motion_y =0;
2455
2456     offset = (src_y * uvlinesize) + src_x;
2457     ptr = ref_picture[1] + offset;
2458     if(s->flags&CODEC_FLAG_EMU_EDGE){
2459         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2460            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2461             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2462             ptr= s->edge_emu_buffer;
2463             emu=1;
2464         }
2465     }
2466     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2467     
2468     ptr = ref_picture[2] + offset;
2469     if(emu){
2470         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2471         ptr= s->edge_emu_buffer;
2472     }
2473     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2474     
2475     return;
2476 }
2477
2478 static inline void gmc_motion(MpegEncContext *s,
2479                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2480                                uint8_t **ref_picture)
2481 {
2482     uint8_t *ptr;
2483     int linesize, uvlinesize;
2484     const int a= s->sprite_warping_accuracy;
2485     int ox, oy;
2486
2487     linesize = s->linesize;
2488     uvlinesize = s->uvlinesize;
2489
2490     ptr = ref_picture[0];
2491
2492     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2493     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2494
2495     s->dsp.gmc(dest_y, ptr, linesize, 16,
2496            ox, 
2497            oy, 
2498            s->sprite_delta[0][0], s->sprite_delta[0][1],
2499            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2500            a+1, (1<<(2*a+1)) - s->no_rounding,
2501            s->h_edge_pos, s->v_edge_pos);
2502     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2503            ox + s->sprite_delta[0][0]*8, 
2504            oy + s->sprite_delta[1][0]*8, 
2505            s->sprite_delta[0][0], s->sprite_delta[0][1],
2506            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2507            a+1, (1<<(2*a+1)) - s->no_rounding,
2508            s->h_edge_pos, s->v_edge_pos);
2509
2510     if(s->flags&CODEC_FLAG_GRAY) return;
2511
2512     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2513     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2514
2515     ptr = ref_picture[1];
2516     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2517            ox, 
2518            oy, 
2519            s->sprite_delta[0][0], s->sprite_delta[0][1],
2520            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2521            a+1, (1<<(2*a+1)) - s->no_rounding,
2522            s->h_edge_pos>>1, s->v_edge_pos>>1);
2523     
2524     ptr = ref_picture[2];
2525     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2526            ox, 
2527            oy, 
2528            s->sprite_delta[0][0], s->sprite_delta[0][1],
2529            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2530            a+1, (1<<(2*a+1)) - s->no_rounding,
2531            s->h_edge_pos>>1, s->v_edge_pos>>1);
2532 }
2533
2534 /**
2535  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2536  * @param buf destination buffer
2537  * @param src source buffer
2538  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2539  * @param block_w width of block
2540  * @param block_h height of block
2541  * @param src_x x coordinate of the top left sample of the block in the source buffer
2542  * @param src_y y coordinate of the top left sample of the block in the source buffer
2543  * @param w width of the source buffer
2544  * @param h height of the source buffer
2545  */
2546 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
2547                                     int src_x, int src_y, int w, int h){
2548     int x, y;
2549     int start_y, start_x, end_y, end_x;
2550
2551     if(src_y>= h){
2552         src+= (h-1-src_y)*linesize;
2553         src_y=h-1;
2554     }else if(src_y<=-block_h){
2555         src+= (1-block_h-src_y)*linesize;
2556         src_y=1-block_h;
2557     }
2558     if(src_x>= w){
2559         src+= (w-1-src_x);
2560         src_x=w-1;
2561     }else if(src_x<=-block_w){
2562         src+= (1-block_w-src_x);
2563         src_x=1-block_w;
2564     }
2565
2566     start_y= FFMAX(0, -src_y);
2567     start_x= FFMAX(0, -src_x);
2568     end_y= FFMIN(block_h, h-src_y);
2569     end_x= FFMIN(block_w, w-src_x);
2570
2571     // copy existing part
2572     for(y=start_y; y<end_y; y++){
2573         for(x=start_x; x<end_x; x++){
2574             buf[x + y*linesize]= src[x + y*linesize];
2575         }
2576     }
2577
2578     //top
2579     for(y=0; y<start_y; y++){
2580         for(x=start_x; x<end_x; x++){
2581             buf[x + y*linesize]= buf[x + start_y*linesize];
2582         }
2583     }
2584
2585     //bottom
2586     for(y=end_y; y<block_h; y++){
2587         for(x=start_x; x<end_x; x++){
2588             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2589         }
2590     }
2591                                     
2592     for(y=0; y<block_h; y++){
2593        //left
2594         for(x=0; x<start_x; x++){
2595             buf[x + y*linesize]= buf[start_x + y*linesize];
2596         }
2597        
2598        //right
2599         for(x=end_x; x<block_w; x++){
2600             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2601         }
2602     }
2603 }
2604
2605 static inline int hpel_motion(MpegEncContext *s, 
2606                                   uint8_t *dest, uint8_t *src,
2607                                   int field_based, int field_select,
2608                                   int src_x, int src_y,
2609                                   int width, int height, int stride,
2610                                   int h_edge_pos, int v_edge_pos,
2611                                   int w, int h, op_pixels_func *pix_op,
2612                                   int motion_x, int motion_y)
2613 {
2614     int dxy;
2615     int emu=0;
2616
2617     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2618     src_x += motion_x >> 1;
2619     src_y += motion_y >> 1;
2620                 
2621     /* WARNING: do no forget half pels */
2622     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2623     if (src_x == width)
2624         dxy &= ~1;
2625     src_y = clip(src_y, -16, height);
2626     if (src_y == height)
2627         dxy &= ~2;
2628     src += src_y * stride + src_x;
2629
2630     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2631         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2632            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2633             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2634                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2635             src= s->edge_emu_buffer;
2636             emu=1;
2637         }
2638     }
2639     if(field_select)
2640         src += s->linesize;
2641     pix_op[dxy](dest, src, stride, h);
2642     return emu;
2643 }
2644
2645 static inline int hpel_motion_lowres(MpegEncContext *s, 
2646                                   uint8_t *dest, uint8_t *src,
2647                                   int field_based, int field_select,
2648                                   int src_x, int src_y,
2649                                   int width, int height, int stride,
2650                                   int h_edge_pos, int v_edge_pos,
2651                                   int w, int h, h264_chroma_mc_func *pix_op,
2652                                   int motion_x, int motion_y)
2653 {
2654     const int lowres= s->avctx->lowres;
2655     const int s_mask= (2<<lowres)-1;
2656     int emu=0;
2657     int sx, sy;
2658
2659     if(s->quarter_sample){
2660         motion_x/=2;
2661         motion_y/=2;
2662     }
2663
2664     sx= motion_x & s_mask;
2665     sy= motion_y & s_mask;
2666     src_x += motion_x >> (lowres+1);
2667     src_y += motion_y >> (lowres+1);
2668                 
2669     src += src_y * stride + src_x;
2670
2671     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2672        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2673         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2674                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2675         src= s->edge_emu_buffer;
2676         emu=1;
2677     }
2678
2679     sx <<= 2 - lowres;
2680     sy <<= 2 - lowres;
2681     if(field_select)
2682         src += s->linesize;
2683     pix_op[lowres](dest, src, stride, h, sx, sy);
2684     return emu;
2685 }
2686
2687 /* apply one mpeg motion vector to the three components */
2688 static always_inline void mpeg_motion(MpegEncContext *s,
2689                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2690                                int field_based, int bottom_field, int field_select,
2691                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2692                                int motion_x, int motion_y, int h)
2693 {
2694     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2695     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2696     
2697 #if 0    
2698 if(s->quarter_sample)
2699 {
2700     motion_x>>=1;
2701     motion_y>>=1;
2702 }
2703 #endif
2704
2705     v_edge_pos = s->v_edge_pos >> field_based;
2706     linesize   = s->current_picture.linesize[0] << field_based;
2707     uvlinesize = s->current_picture.linesize[1] << field_based;
2708
2709     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2710     src_x = s->mb_x* 16               + (motion_x >> 1);
2711     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2712
2713     if (s->out_format == FMT_H263) {
2714         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2715             mx = (motion_x>>1)|(motion_x&1);
2716             my = motion_y >>1;
2717             uvdxy = ((my & 1) << 1) | (mx & 1);
2718             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2719             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2720         }else{
2721             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2722             uvsrc_x = src_x>>1;
2723             uvsrc_y = src_y>>1;
2724         }
2725     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2726         mx = motion_x / 4;
2727         my = motion_y / 4;
2728         uvdxy = 0;
2729         uvsrc_x = s->mb_x*8 + mx;
2730         uvsrc_y = s->mb_y*8 + my;
2731     } else {
2732         if(s->chroma_y_shift){
2733             mx = motion_x / 2;
2734             my = motion_y / 2;
2735             uvdxy = ((my & 1) << 1) | (mx & 1);
2736             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2737             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2738         } else {
2739             if(s->chroma_x_shift){
2740             //Chroma422
2741                 mx = motion_x / 2;
2742                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2743                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2744                 uvsrc_y = src_y;
2745             } else {
2746             //Chroma444
2747                 uvdxy = dxy;
2748                 uvsrc_x = src_x;
2749                 uvsrc_y = src_y;
2750             }
2751         }
2752     }
2753
2754     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2755     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2756     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2757
2758     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2759        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2760             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2761                s->codec_id == CODEC_ID_MPEG1VIDEO){
2762                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2763                 return ;
2764             }
2765             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2766                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2767             ptr_y = s->edge_emu_buffer;
2768             if(!(s->flags&CODEC_FLAG_GRAY)){
2769                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2770                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
2771                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2772                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
2773                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2774                 ptr_cb= uvbuf;
2775                 ptr_cr= uvbuf+16;
2776             }
2777     }
2778
2779     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2780         dest_y += s->linesize;
2781         dest_cb+= s->uvlinesize;
2782         dest_cr+= s->uvlinesize;
2783     }
2784
2785     if(field_select){
2786         ptr_y += s->linesize;
2787         ptr_cb+= s->uvlinesize;
2788         ptr_cr+= s->uvlinesize;
2789     }
2790
2791     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2792     
2793     if(!(s->flags&CODEC_FLAG_GRAY)){
2794         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2795         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2796     }
2797     if(s->out_format == FMT_H261){
2798         ff_h261_loop_filter(s);
2799     }
2800 }
2801
2802 /* apply one mpeg motion vector to the three components */
2803 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2804                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2805                                int field_based, int bottom_field, int field_select,
2806                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2807                                int motion_x, int motion_y, int h)
2808 {
2809     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2810     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2811     const int lowres= s->avctx->lowres;
2812     const int block_s= 8>>lowres;
2813     const int s_mask= (2<<lowres)-1;
2814     const int h_edge_pos = s->h_edge_pos >> lowres;
2815     const int v_edge_pos = s->v_edge_pos >> lowres;
2816     linesize   = s->current_picture.linesize[0] << field_based;
2817     uvlinesize = s->current_picture.linesize[1] << field_based;
2818
2819     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
2820         motion_x/=2;
2821         motion_y/=2;
2822     }
2823     
2824     if(field_based){
2825         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
2826     }
2827
2828     sx= motion_x & s_mask;
2829     sy= motion_y & s_mask;
2830     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
2831     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
2832     
2833     if (s->out_format == FMT_H263) {
2834         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
2835         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
2836         uvsrc_x = src_x>>1;
2837         uvsrc_y = src_y>>1;
2838     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2839         mx = motion_x / 4;
2840         my = motion_y / 4;
2841         uvsx = (2*mx) & s_mask;
2842         uvsy = (2*my) & s_mask;
2843         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
2844         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
2845     } else {
2846         mx = motion_x / 2;
2847         my = motion_y / 2;
2848         uvsx = mx & s_mask;
2849         uvsy = my & s_mask;
2850         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
2851         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
2852     }
2853
2854     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2855     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2856     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2857
2858     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
2859        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2860             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2861                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2862             ptr_y = s->edge_emu_buffer;
2863             if(!(s->flags&CODEC_FLAG_GRAY)){
2864                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2865                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
2866                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2867                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
2868                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
2869                 ptr_cb= uvbuf;
2870                 ptr_cr= uvbuf+16;
2871             }
2872     }
2873
2874     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2875         dest_y += s->linesize;
2876         dest_cb+= s->uvlinesize;
2877         dest_cr+= s->uvlinesize;
2878     }
2879
2880     if(field_select){
2881         ptr_y += s->linesize;
2882         ptr_cb+= s->uvlinesize;
2883         ptr_cr+= s->uvlinesize;
2884     }
2885
2886     sx <<= 2 - lowres;
2887     sy <<= 2 - lowres;
2888     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
2889     
2890     if(!(s->flags&CODEC_FLAG_GRAY)){
2891         uvsx <<= 2 - lowres;
2892         uvsy <<= 2 - lowres;
2893         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2894         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
2895     }
2896     //FIXME h261 lowres loop filter
2897 }
2898
2899 //FIXME move to dsputil, avg variant, 16x16 version
2900 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2901     int x;
2902     uint8_t * const top   = src[1];
2903     uint8_t * const left  = src[2];
2904     uint8_t * const mid   = src[0];
2905     uint8_t * const right = src[3];
2906     uint8_t * const bottom= src[4];
2907 #define OBMC_FILTER(x, t, l, m, r, b)\
2908     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2909 #define OBMC_FILTER4(x, t, l, m, r, b)\
2910     OBMC_FILTER(x         , t, l, m, r, b);\
2911     OBMC_FILTER(x+1       , t, l, m, r, b);\
2912     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2913     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2914     
2915     x=0;
2916     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2917     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2918     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2919     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2920     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2921     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2922     x+= stride;
2923     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2924     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2925     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2926     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2927     x+= stride;
2928     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2929     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2930     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2931     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2932     x+= 2*stride;
2933     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2934     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2935     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2936     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2937     x+= 2*stride;
2938     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2939     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2940     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2941     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2942     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2943     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2944     x+= stride;
2945     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2946     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2947     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2948     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2949 }
2950
2951 /* obmc for 1 8x8 luma block */
2952 static inline void obmc_motion(MpegEncContext *s,
2953                                uint8_t *dest, uint8_t *src,
2954                                int src_x, int src_y,
2955                                op_pixels_func *pix_op,
2956                                int16_t mv[5][2]/* mid top left right bottom*/)
2957 #define MID    0
2958 {
2959     int i;
2960     uint8_t *ptr[5];
2961     
2962     assert(s->quarter_sample==0);
2963     
2964     for(i=0; i<5; i++){
2965         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2966             ptr[i]= ptr[MID];
2967         }else{
2968             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
2969             hpel_motion(s, ptr[i], src, 0, 0,
2970                         src_x, src_y,
2971                         s->width, s->height, s->linesize,
2972                         s->h_edge_pos, s->v_edge_pos,
2973                         8, 8, pix_op,
2974                         mv[i][0], mv[i][1]);
2975         }
2976     }
2977
2978     put_obmc(dest, ptr, s->linesize);                
2979 }
2980
2981 static inline void qpel_motion(MpegEncContext *s,
2982                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2983                                int field_based, int bottom_field, int field_select,
2984                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2985                                qpel_mc_func (*qpix_op)[16],
2986                                int motion_x, int motion_y, int h)
2987 {
2988     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2989     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
2990
2991     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2992     src_x = s->mb_x *  16                 + (motion_x >> 2);
2993     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
2994
2995     v_edge_pos = s->v_edge_pos >> field_based;
2996     linesize = s->linesize << field_based;
2997     uvlinesize = s->uvlinesize << field_based;
2998     
2999     if(field_based){
3000         mx= motion_x/2;
3001         my= motion_y>>1;
3002     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3003         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3004         mx= (motion_x>>1) + rtab[motion_x&7];
3005         my= (motion_y>>1) + rtab[motion_y&7];
3006     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3007         mx= (motion_x>>1)|(motion_x&1);
3008         my= (motion_y>>1)|(motion_y&1);
3009     }else{
3010         mx= motion_x/2;
3011         my= motion_y/2;
3012     }
3013     mx= (mx>>1)|(mx&1);
3014     my= (my>>1)|(my&1);
3015
3016     uvdxy= (mx&1) | ((my&1)<<1);
3017     mx>>=1;
3018     my>>=1;
3019
3020     uvsrc_x = s->mb_x *  8                 + mx;
3021     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3022
3023     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3024     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3025     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3026
3027     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
3028        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3029         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, 
3030                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3031         ptr_y= s->edge_emu_buffer;
3032         if(!(s->flags&CODEC_FLAG_GRAY)){
3033             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3034             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based, 
3035                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3036             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based, 
3037                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3038             ptr_cb= uvbuf;
3039             ptr_cr= uvbuf + 16;
3040         }
3041     }
3042
3043     if(!field_based)
3044         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3045     else{
3046         if(bottom_field){
3047             dest_y += s->linesize;
3048             dest_cb+= s->uvlinesize;
3049             dest_cr+= s->uvlinesize;
3050         }
3051
3052         if(field_select){
3053             ptr_y  += s->linesize;
3054             ptr_cb += s->uvlinesize;
3055             ptr_cr += s->uvlinesize;
3056         }
3057         //damn interlaced mode
3058         //FIXME boundary mirroring is not exactly correct here
3059         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3060         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3061     }
3062     if(!(s->flags&CODEC_FLAG_GRAY)){
3063         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3064         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3065     }
3066 }
3067
3068 inline int ff_h263_round_chroma(int x){
3069     if (x >= 0)
3070         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3071     else {
3072         x = -x;
3073         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3074     }
3075 }
3076
3077 /**
3078  * h263 chorma 4mv motion compensation.
3079  */
3080 static inline void chroma_4mv_motion(MpegEncContext *s,
3081                                      uint8_t *dest_cb, uint8_t *dest_cr,
3082                                      uint8_t **ref_picture,
3083                                      op_pixels_func *pix_op,
3084                                      int mx, int my){
3085     int dxy, emu=0, src_x, src_y, offset;
3086     uint8_t *ptr;
3087     
3088     /* In case of 8X8, we construct a single chroma motion vector
3089        with a special rounding */
3090     mx= ff_h263_round_chroma(mx);
3091     my= ff_h263_round_chroma(my);
3092     
3093     dxy = ((my & 1) << 1) | (mx & 1);
3094     mx >>= 1;
3095     my >>= 1;
3096
3097     src_x = s->mb_x * 8 + mx;
3098     src_y = s->mb_y * 8 + my;
3099     src_x = clip(src_x, -8, s->width/2);
3100     if (src_x == s->width/2)
3101         dxy &= ~1;
3102     src_y = clip(src_y, -8, s->height/2);
3103     if (src_y == s->height/2)
3104         dxy &= ~2;
3105     
3106     offset = (src_y * (s->uvlinesize)) + src_x;
3107     ptr = ref_picture[1] + offset;
3108     if(s->flags&CODEC_FLAG_EMU_EDGE){
3109         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3110            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3111             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3112             ptr= s->edge_emu_buffer;
3113             emu=1;
3114         }
3115     }
3116     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3117
3118     ptr = ref_picture[2] + offset;
3119     if(emu){
3120         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3121         ptr= s->edge_emu_buffer;
3122     }
3123     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3124 }
3125
3126 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3127                                      uint8_t *dest_cb, uint8_t *dest_cr,
3128                                      uint8_t **ref_picture,
3129                                      h264_chroma_mc_func *pix_op,
3130                                      int mx, int my){
3131     const int lowres= s->avctx->lowres;
3132     const int block_s= 8>>lowres;
3133     const int s_mask= (2<<lowres)-1;
3134     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3135     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3136     int emu=0, src_x, src_y, offset, sx, sy;
3137     uint8_t *ptr;
3138     
3139     if(s->quarter_sample){
3140         mx/=2;
3141         my/=2;
3142     }
3143
3144     /* In case of 8X8, we construct a single chroma motion vector
3145        with a special rounding */
3146     mx= ff_h263_round_chroma(mx);
3147     my= ff_h263_round_chroma(my);
3148     
3149     sx= mx & s_mask;
3150     sy= my & s_mask;
3151     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3152     src_y = s->mb_y*block_s + (my >> (lowres+1));
3153     
3154     offset = src_y * s->uvlinesize + src_x;
3155     ptr = ref_picture[1] + offset;
3156     if(s->flags&CODEC_FLAG_EMU_EDGE){
3157         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3158            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3159             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3160             ptr= s->edge_emu_buffer;
3161             emu=1;
3162         }
3163     }     
3164     sx <<= 2 - lowres;
3165     sy <<= 2 - lowres;
3166     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3167           
3168     ptr = ref_picture[2] + offset;
3169     if(emu){
3170         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3171         ptr= s->edge_emu_buffer;
3172     }
3173     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3174 }
3175
3176 /**
3177  * motion compesation of a single macroblock
3178  * @param s context
3179  * @param dest_y luma destination pointer
3180  * @param dest_cb chroma cb/u destination pointer
3181  * @param dest_cr chroma cr/v destination pointer
3182  * @param dir direction (0->forward, 1->backward)
3183  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3184  * @param pic_op halfpel motion compensation function (average or put normally)
3185  * @param pic_op qpel motion compensation function (average or put normally)
3186  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3187  */
3188 static inline void MPV_motion(MpegEncContext *s, 
3189                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3190                               int dir, uint8_t **ref_picture, 
3191                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3192 {
3193     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3194     int mb_x, mb_y, i;
3195     uint8_t *ptr, *dest;
3196
3197     mb_x = s->mb_x;
3198     mb_y = s->mb_y;
3199
3200     if(s->obmc && s->pict_type != B_TYPE){
3201         int16_t mv_cache[4][4][2];
3202         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3203         const int mot_stride= s->b8_stride;
3204         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3205
3206         assert(!s->mb_skiped);
3207                 
3208         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3209         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3210         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3211
3212         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3213             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3214         }else{
3215             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3216         }
3217
3218         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3219             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3220             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3221         }else{
3222             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3223             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3224         }
3225
3226         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3227             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3228             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3229         }else{
3230             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3231             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3232         }
3233         
3234         mx = 0;
3235         my = 0;
3236         for(i=0;i<4;i++) {
3237             const int x= (i&1)+1;
3238             const int y= (i>>1)+1;
3239             int16_t mv[5][2]= {
3240                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3241                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3242                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3243                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3244                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3245             //FIXME cleanup
3246             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3247                         ref_picture[0],
3248                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3249                         pix_op[1],
3250                         mv);
3251
3252             mx += mv[0][0];
3253             my += mv[0][1];
3254         }
3255         if(!(s->flags&CODEC_FLAG_GRAY))
3256             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3257
3258         return;
3259     }
3260    
3261     switch(s->mv_type) {
3262     case MV_TYPE_16X16:
3263         if(s->mcsel){
3264             if(s->real_sprite_warping_points==1){
3265                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3266                             ref_picture);
3267             }else{
3268                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3269                             ref_picture);
3270             }
3271         }else if(s->quarter_sample){
3272             qpel_motion(s, dest_y, dest_cb, dest_cr, 
3273                         0, 0, 0,
3274                         ref_picture, pix_op, qpix_op,
3275                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3276         }else if(s->mspel){
3277             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3278                         ref_picture, pix_op,
3279                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3280         }else
3281         {
3282             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3283                         0, 0, 0,
3284                         ref_picture, pix_op,
3285                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3286         }           
3287         break;
3288     case MV_TYPE_8X8:
3289         mx = 0;
3290         my = 0;
3291         if(s->quarter_sample){
3292             for(i=0;i<4;i++) {
3293                 motion_x = s->mv[dir][i][0];
3294                 motion_y = s->mv[dir][i][1];
3295
3296                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3297                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3298                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3299                     
3300                 /* WARNING: do no forget half pels */
3301                 src_x = clip(src_x, -16, s->width);
3302                 if (src_x == s->width)
3303                     dxy &= ~3;
3304                 src_y = clip(src_y, -16, s->height);
3305                 if (src_y == s->height)
3306                     dxy &= ~12;
3307                     
3308                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3309                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3310                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
3311                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3312                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3313                         ptr= s->edge_emu_buffer;
3314                     }
3315                 }
3316                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3317                 qpix_op[1][dxy](dest, ptr, s->linesize);
3318
3319                 mx += s->mv[dir][i][0]/2;
3320                 my += s->mv[dir][i][1]/2;
3321             }
3322         }else{
3323             for(i=0;i<4;i++) {
3324                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3325                             ref_picture[0], 0, 0,
3326                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3327                             s->width, s->height, s->linesize,
3328                             s->h_edge_pos, s->v_edge_pos,
3329                             8, 8, pix_op[1],
3330                             s->mv[dir][i][0], s->mv[dir][i][1]);
3331
3332                 mx += s->mv[dir][i][0];
3333                 my += s->mv[dir][i][1];
3334             }
3335         }
3336
3337         if(!(s->flags&CODEC_FLAG_GRAY))
3338             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3339         break;
3340     case MV_TYPE_FIELD:
3341         if (s->picture_structure == PICT_FRAME) {
3342             if(s->quarter_sample){
3343                 for(i=0; i<2; i++){
3344                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3345                                 1, i, s->field_select[dir][i],
3346                                 ref_picture, pix_op, qpix_op,
3347                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3348                 }
3349             }else{
3350                 /* top field */       
3351                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3352                             1, 0, s->field_select[dir][0],
3353                             ref_picture, pix_op,
3354                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3355                 /* bottom field */
3356                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3357                             1, 1, s->field_select[dir][1],
3358                             ref_picture, pix_op,
3359                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3360             }
3361         } else {
3362             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3363                 ref_picture= s->current_picture_ptr->data;
3364             } 
3365
3366             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3367                         0, 0, s->field_select[dir][0],
3368                         ref_picture, pix_op,
3369                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3370         }
3371         break;
3372     case MV_TYPE_16X8:
3373         for(i=0; i<2; i++){
3374             uint8_t ** ref2picture;
3375
3376             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3377                 ref2picture= ref_picture;
3378             }else{
3379                 ref2picture= s->current_picture_ptr->data;
3380             } 
3381
3382             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3383                         0, 0, s->field_select[dir][i],
3384                         ref2picture, pix_op,
3385                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3386                 
3387             dest_y += 16*s->linesize;
3388             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3389             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3390         }        
3391         break;
3392     case MV_TYPE_DMV:
3393         if(s->picture_structure == PICT_FRAME){
3394             for(i=0; i<2; i++){
3395                 int j;
3396                 for(j=0; j<2; j++){
3397                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3398                                 1, j, j^i,
3399                                 ref_picture, pix_op,
3400                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3401                 }
3402                 pix_op = s->dsp.avg_pixels_tab; 
3403             }
3404         }else{
3405             for(i=0; i<2; i++){
3406                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3407                             0, 0, s->picture_structure != i+1,
3408                             ref_picture, pix_op,
3409                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3410
3411                 // after put we make avg of the same block
3412                 pix_op=s->dsp.avg_pixels_tab; 
3413
3414                 //opposite parity is always in the same frame if this is second field
3415                 if(!s->first_field){
3416                     ref_picture = s->current_picture_ptr->data;    
3417                 }
3418             }
3419         }
3420     break;
3421     default: assert(0);
3422     }
3423 }
3424
3425 /**
3426  * motion compesation of a single macroblock
3427  * @param s context
3428  * @param dest_y luma destination pointer
3429  * @param dest_cb chroma cb/u destination pointer
3430  * @param dest_cr chroma cr/v destination pointer
3431  * @param dir direction (0->forward, 1->backward)
3432  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3433  * @param pic_op halfpel motion compensation function (average or put normally)
3434  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3435  */
3436 static inline void MPV_motion_lowres(MpegEncContext *s, 
3437                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3438                               int dir, uint8_t **ref_picture, 
3439                               h264_chroma_mc_func *pix_op)
3440 {
3441     int mx, my;
3442     int mb_x, mb_y, i;
3443     const int lowres= s->avctx->lowres;
3444     const int block_s= 8>>lowres;    
3445
3446     mb_x = s->mb_x;
3447     mb_y = s->mb_y;
3448
3449     switch(s->mv_type) {
3450     case MV_TYPE_16X16:
3451         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
3452                     0, 0, 0,
3453                     ref_picture, pix_op,
3454                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3455         break;
3456     case MV_TYPE_8X8:
3457         mx = 0;
3458         my = 0;
3459             for(i=0;i<4;i++) {
3460                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3461                             ref_picture[0], 0, 0,
3462                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3463                             s->width, s->height, s->linesize,
3464                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3465                             block_s, block_s, pix_op,
3466                             s->mv[dir][i][0], s->mv[dir][i][1]);
3467
3468                 mx += s->mv[dir][i][0];
3469                 my += s->mv[dir][i][1];
3470             }
3471
3472         if(!(s->flags&CODEC_FLAG_GRAY))
3473             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3474         break;
3475     case MV_TYPE_FIELD:
3476         if (s->picture_structure == PICT_FRAME) {
3477             /* top field */       
3478             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3479                         1, 0, s->field_select[dir][0],
3480                         ref_picture, pix_op,
3481                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3482             /* bottom field */
3483             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3484                         1, 1, s->field_select[dir][1],
3485                         ref_picture, pix_op,
3486                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3487         } else {
3488             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3489                 ref_picture= s->current_picture_ptr->data;
3490             } 
3491
3492             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3493                         0, 0, s->field_select[dir][0],
3494                         ref_picture, pix_op,
3495                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3496         }
3497         break;
3498     case MV_TYPE_16X8:
3499         for(i=0; i<2; i++){
3500             uint8_t ** ref2picture;
3501
3502             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3503                 ref2picture= ref_picture;
3504             }else{
3505                 ref2picture= s->current_picture_ptr->data;
3506             } 
3507
3508             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
3509                         0, 0, s->field_select[dir][i],
3510                         ref2picture, pix_op,
3511                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3512                 
3513             dest_y += 2*block_s*s->linesize;
3514             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3515             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3516         }        
3517         break;
3518     case MV_TYPE_DMV:
3519         if(s->picture_structure == PICT_FRAME){
3520             for(i=0; i<2; i++){
3521                 int j;
3522                 for(j=0; j<2; j++){
3523                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3524                                 1, j, j^i,
3525                                 ref_picture, pix_op,
3526                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3527                 }
3528                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3529             }
3530         }else{
3531             for(i=0; i<2; i++){
3532                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
3533                             0, 0, s->picture_structure != i+1,
3534                             ref_picture, pix_op,
3535                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3536
3537                 // after put we make avg of the same block
3538                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3539
3540                 //opposite parity is always in the same frame if this is second field
3541                 if(!s->first_field){
3542                     ref_picture = s->current_picture_ptr->data;    
3543                 }
3544             }
3545         }
3546     break;
3547     default: assert(0);
3548     }
3549 }
3550
3551 /* put block[] to dest[] */
3552 static inline void put_dct(MpegEncContext *s, 
3553                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3554 {
3555     s->dct_unquantize_intra(s, block, i, qscale);
3556     s->dsp.idct_put (dest, line_size, block);
3557 }
3558
3559 /* add block[] to dest[] */
3560 static inline void add_dct(MpegEncContext *s, 
3561                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3562 {
3563     if (s->block_last_index[i] >= 0) {
3564         s->dsp.idct_add (dest, line_size, block);
3565     }
3566 }
3567
3568 static inline void add_dequant_dct(MpegEncContext *s, 
3569                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3570 {
3571     if (s->block_last_index[i] >= 0) {
3572         s->dct_unquantize_inter(s, block, i, qscale);
3573
3574         s->dsp.idct_add (dest, line_size, block);
3575     }
3576 }
3577
3578 /**
3579  * cleans dc, ac, coded_block for the current non intra MB
3580  */
3581 void ff_clean_intra_table_entries(MpegEncContext *s)
3582 {
3583     int wrap = s->b8_stride;
3584     int xy = s->block_index[0];
3585     
3586     s->dc_val[0][xy           ] = 
3587     s->dc_val[0][xy + 1       ] = 
3588     s->dc_val[0][xy     + wrap] =
3589     s->dc_val[0][xy + 1 + wrap] = 1024;
3590     /* ac pred */
3591     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3592     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3593     if (s->msmpeg4_version>=3) {
3594         s->coded_block[xy           ] =
3595         s->coded_block[xy + 1       ] =
3596         s->coded_block[xy     + wrap] =
3597         s->coded_block[xy + 1 + wrap] = 0;
3598     }
3599     /* chroma */
3600     wrap = s->mb_stride;
3601     xy = s->mb_x + s->mb_y * wrap;
3602     s->dc_val[1][xy] =
3603     s->dc_val[2][xy] = 1024;
3604     /* ac pred */
3605     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3606     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3607     
3608     s->mbintra_table[xy]= 0;
3609 }
3610
3611 /* generic function called after a macroblock has been parsed by the
3612    decoder or after it has been encoded by the encoder.
3613
3614    Important variables used:
3615    s->mb_intra : true if intra macroblock
3616    s->mv_dir   : motion vector direction
3617    s->mv_type  : motion vector type
3618    s->mv       : motion vector
3619    s->interlaced_dct : true if interlaced dct used (mpeg2)
3620  */
3621 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3622 {
3623     int mb_x, mb_y;
3624     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3625 #ifdef HAVE_XVMC
3626     if(s->avctx->xvmc_acceleration){
3627         XVMC_decode_mb(s);//xvmc uses pblocks
3628         return;
3629     }
3630 #endif
3631
3632     mb_x = s->mb_x;
3633     mb_y = s->mb_y;
3634
3635     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3636        /* save DCT coefficients */
3637        int i,j;
3638        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3639        for(i=0; i<6; i++)
3640            for(j=0; j<64; j++)
3641                *dct++ = block[i][s->dsp.idct_permutation[j]];
3642     }
3643
3644     s->current_picture.qscale_table[mb_xy]= s->qscale;
3645
3646     /* update DC predictors for P macroblocks */
3647     if (!s->mb_intra) {
3648         if (s->h263_pred || s->h263_aic) {
3649             if(s->mbintra_table[mb_xy])
3650                 ff_clean_intra_table_entries(s);
3651         } else {
3652             s->last_dc[0] =
3653             s->last_dc[1] =
3654             s->last_dc[2] = 128 << s->intra_dc_precision;
3655         }
3656     }
3657     else if (s->h263_pred || s->h263_aic)
3658         s->mbintra_table[mb_xy]=1;
3659
3660     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3661         uint8_t *dest_y, *dest_cb, *dest_cr;
3662         int dct_linesize, dct_offset;
3663         op_pixels_func (*op_pix)[4];
3664         qpel_mc_func (*op_qpix)[16];
3665         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3666         const int uvlinesize= s->current_picture.linesize[1];
3667         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3668         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3669
3670         /* avoid copy if macroblock skipped in last frame too */
3671         /* skip only during decoding as we might trash the buffers during encoding a bit */
3672         if(!s->encoding){
3673             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3674             const int age= s->current_picture.age;
3675
3676             assert(age);
3677
3678             if (s->mb_skiped) {
3679                 s->mb_skiped= 0;
3680                 assert(s->pict_type!=I_TYPE);
3681  
3682                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
3683                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3684
3685                 /* if previous was skipped too, then nothing to do !  */
3686                 if (*mbskip_ptr >= age && s->current_picture.reference){
3687                     return;
3688                 }
3689             } else if(!s->current_picture.reference){
3690                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3691                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3692             } else{
3693                 *mbskip_ptr = 0; /* not skipped */
3694             }
3695         }
3696         
3697         dct_linesize = linesize << s->interlaced_dct;
3698         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3699         
3700         if(readable){
3701             dest_y=  s->dest[0];
3702             dest_cb= s->dest[1];
3703             dest_cr= s->dest[2];
3704         }else{
3705             dest_y = s->b_scratchpad;
3706             dest_cb= s->b_scratchpad+16*linesize;
3707             dest_cr= s->b_scratchpad+32*linesize;
3708         }
3709
3710         if (!s->mb_intra) {
3711             /* motion handling */
3712             /* decoding or more than one mb_type (MC was allready done otherwise) */
3713             if(!s->encoding){
3714                 if(lowres_flag){
3715                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3716
3717                     if (s->mv_dir & MV_DIR_FORWARD) {
3718                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3719                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3720                     }
3721                     if (s->mv_dir & MV_DIR_BACKWARD) {
3722                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3723                     }
3724                 }else{
3725                     if ((!s->no_rounding) || s->pict_type==B_TYPE){                
3726                         op_pix = s->dsp.put_pixels_tab;
3727                         op_qpix= s->dsp.put_qpel_pixels_tab;
3728                     }else{
3729                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3730                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3731                     }
3732                     if (s->mv_dir & MV_DIR_FORWARD) {
3733                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3734                         op_pix = s->dsp.avg_pixels_tab;
3735                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3736                     }
3737                     if (s->mv_dir & MV_DIR_BACKWARD) {
3738                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3739                     }
3740                 }
3741             }
3742
3743             /* skip dequant / idct if we are really late ;) */
3744             if(s->hurry_up>1) return;
3745
3746             /* add dct residue */
3747             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3748                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3749                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3750                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3751                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3752                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3753
3754                 if(!(s->flags&CODEC_FLAG_GRAY)){
3755                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3756                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3757                 }
3758             } else if(s->codec_id != CODEC_ID_WMV2){
3759                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3760                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3761                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3762                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3763
3764                 if(!(s->flags&CODEC_FLAG_GRAY)){
3765                     if(s->chroma_y_shift){//Chroma420
3766                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3767                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3768                     }else{
3769                         //chroma422
3770                         dct_linesize = uvlinesize << s->interlaced_dct;
3771                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3772
3773                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3774                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3775                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3776                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3777                         if(!s->chroma_x_shift){//Chroma444
3778                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3779                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3780                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3781                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3782                         }
3783                     }
3784                 }//fi gray
3785             }
3786             else{
3787                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3788             }
3789         } else {
3790             /* dct only in intra block */
3791             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3792                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3793                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3794                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3795                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3796
3797                 if(!(s->flags&CODEC_FLAG_GRAY)){
3798                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3799                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3800                 }
3801             }else{
3802                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3803                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3804                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3805                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3806
3807                 if(!(s->flags&CODEC_FLAG_GRAY)){
3808                     if(s->chroma_y_shift){
3809                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3810                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3811                     }else{
3812
3813                         dct_linesize = uvlinesize << s->interlaced_dct;
3814                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3815
3816                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3817                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3818                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3819                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3820                         if(!s->chroma_x_shift){//Chroma444
3821                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3822                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3823                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3824                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3825                         }
3826                     }
3827                 }//gray
3828             }
3829         }
3830         if(!readable){
3831             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3832             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3833             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3834         }
3835     }
3836 }
3837
3838 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
3839     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
3840     else                  MPV_decode_mb_internal(s, block, 0);
3841 }
3842
3843 #ifdef CONFIG_ENCODERS
3844
3845 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3846 {
3847     static const char tab[64]=
3848         {3,2,2,1,1,1,1,1,
3849          1,1,1,1,1,1,1,1,
3850          1,1,1,1,1,1,1,1,
3851          0,0,0,0,0,0,0,0,
3852          0,0,0,0,0,0,0,0,
3853          0,0,0,0,0,0,0,0,
3854          0,0,0,0,0,0,0,0,
3855          0,0,0,0,0,0,0,0};
3856     int score=0;
3857     int run=0;
3858     int i;
3859     DCTELEM *block= s->block[n];
3860     const int last_index= s->block_last_index[n];
3861     int skip_dc;
3862
3863     if(threshold<0){
3864         skip_dc=0;
3865         threshold= -threshold;
3866     }else
3867         skip_dc=1;
3868
3869     /* are all which we could set to zero are allready zero? */
3870     if(last_index<=skip_dc - 1) return;
3871
3872     for(i=0; i<=last_index; i++){
3873         const int j = s->intra_scantable.permutated[i];
3874         const int level = ABS(block[j]);
3875         if(level==1){
3876             if(skip_dc && i==0) continue;
3877             score+= tab[run];
3878             run=0;
3879         }else if(level>1){
3880             return;
3881         }else{
3882             run++;
3883         }
3884     }
3885     if(score >= threshold) return;
3886     for(i=skip_dc; i<=last_index; i++){
3887         const int j = s->intra_scantable.permutated[i];
3888         block[j]=0;
3889     }
3890     if(block[0]) s->block_last_index[n]= 0;
3891     else         s->block_last_index[n]= -1;
3892 }
3893
3894 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3895 {
3896     int i;
3897     const int maxlevel= s->max_qcoeff;
3898     const int minlevel= s->min_qcoeff;
3899     int overflow=0;
3900     
3901     if(s->mb_intra){
3902         i=1; //skip clipping of intra dc
3903     }else
3904         i=0;
3905     
3906     for(;i<=last_index; i++){
3907         const int j= s->intra_scantable.permutated[i];
3908         int level = block[j];
3909        
3910         if     (level>maxlevel){
3911             level=maxlevel;
3912             overflow++;
3913         }else if(level<minlevel){
3914             level=minlevel;
3915             overflow++;
3916         }
3917         
3918         block[j]= level;
3919     }
3920     
3921     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3922         av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
3923 }
3924
3925 #endif //CONFIG_ENCODERS
3926
3927 /**
3928  *
3929  * @param h is the normal height, this will be reduced automatically if needed for the last row
3930  */
3931 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3932     if (s->avctx->draw_horiz_band) {
3933         AVFrame *src;
3934         int offset[4];
3935         
3936         if(s->picture_structure != PICT_FRAME){
3937             h <<= 1;
3938             y <<= 1;
3939             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3940         }
3941
3942         h= FFMIN(h, s->avctx->height - y);
3943
3944         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
3945             src= (AVFrame*)s->current_picture_ptr;
3946         else if(s->last_picture_ptr)
3947             src= (AVFrame*)s->last_picture_ptr;
3948         else
3949             return;
3950             
3951         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3952             offset[0]=
3953             offset[1]=
3954             offset[2]=
3955             offset[3]= 0;
3956         }else{
3957             offset[0]= y * s->linesize;;
3958             offset[1]= 
3959             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3960             offset[3]= 0;
3961         }
3962
3963         emms_c();
3964
3965         s->avctx->draw_horiz_band(s->avctx, src, offset,
3966                                   y, s->picture_structure, h);
3967     }
3968 }
3969
3970 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
3971     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3972     const int uvlinesize= s->current_picture.linesize[1];
3973     const int mb_size= 4 - s->avctx->lowres;
3974         
3975     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
3976     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
3977     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
3978     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
3979     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3980     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3981     //block_index is not used by mpeg2, so it is not affected by chroma_format
3982
3983     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
3984     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
3985     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
3986
3987     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
3988     {
3989         s->dest[0] += s->mb_y *   linesize << mb_size;
3990         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
3991         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
3992     }
3993 }
3994
3995 #ifdef CONFIG_ENCODERS
3996
3997 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
3998     int x, y;
3999 //FIXME optimize
4000     for(y=0; y<8; y++){
4001         for(x=0; x<8; x++){
4002             int x2, y2;
4003             int sum=0;
4004             int sqr=0;
4005             int count=0;
4006
4007             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4008                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4009                     int v= ptr[x2 + y2*stride];
4010                     sum += v;
4011                     sqr += v*v;
4012                     count++;
4013                 }
4014             }
4015             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4016         }
4017     }
4018 }
4019
4020 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4021 {
4022     int16_t weight[6][64];
4023     DCTELEM orig[6][64];
4024     const int mb_x= s->mb_x;
4025     const int mb_y= s->mb_y;
4026     int i;
4027     int skip_dct[6];
4028     int dct_offset   = s->linesize*8; //default for progressive frames
4029     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4030     int wrap_y, wrap_c;
4031     
4032     for(i=0; i<6; i++) skip_dct[i]=0;
4033     
4034     if(s->adaptive_quant){
4035         const int last_qp= s->qscale;
4036         const int mb_xy= mb_x + mb_y*s->mb_stride;
4037
4038         s->lambda= s->lambda_table[mb_xy];
4039         update_qscale(s);
4040     
4041         if(!(s->flags&CODEC_FLAG_QP_RD)){
4042             s->dquant= s->qscale - last_qp;
4043
4044             if(s->out_format==FMT_H263){
4045                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4046             
4047                 if(s->codec_id==CODEC_ID_MPEG4){        
4048                     if(!s->mb_intra){
4049                         if(s->pict_type == B_TYPE){
4050                             if(s->dquant&1) 
4051                                 s->dquant= (s->dquant/2)*2;
4052                             if(s->mv_dir&MV_DIRECT)
4053                                 s->dquant= 0;
4054                         }
4055                         if(s->mv_type==MV_TYPE_8X8)
4056                             s->dquant=0;
4057                     }
4058                 }
4059             }
4060         }
4061         ff_set_qscale(s, last_qp + s->dquant);
4062     }else if(s->flags&CODEC_FLAG_QP_RD)
4063         ff_set_qscale(s, s->qscale + s->dquant);
4064
4065     wrap_y = s->linesize;
4066     wrap_c = s->uvlinesize;
4067     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4068     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4069     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4070
4071     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4072         uint8_t *ebuf= s->edge_emu_buffer + 32;
4073         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4074         ptr_y= ebuf;
4075         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4076         ptr_cb= ebuf+18*wrap_y;
4077         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4078         ptr_cr= ebuf+18*wrap_y+8;
4079     }
4080
4081     if (s->mb_intra) {
4082         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4083             int progressive_score, interlaced_score;
4084
4085             s->interlaced_dct=0;
4086             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8) 
4087                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4088
4089             if(progressive_score > 0){
4090                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8) 
4091                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4092                 if(progressive_score > interlaced_score){
4093                     s->interlaced_dct=1;
4094             
4095                     dct_offset= wrap_y;
4096                     wrap_y<<=1;
4097                 }
4098             }
4099         }
4100         
4101         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4102         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4103         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4104         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4105
4106         if(s->flags&CODEC_FLAG_GRAY){
4107             skip_dct[4]= 1;
4108             skip_dct[5]= 1;
4109         }else{
4110             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4111             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4112         }
4113     }else{
4114         op_pixels_func (*op_pix)[4];
4115         qpel_mc_func (*op_qpix)[16];
4116         uint8_t *dest_y, *dest_cb, *dest_cr;
4117
4118         dest_y  = s->dest[0];
4119         dest_cb = s->dest[1];
4120         dest_cr = s->dest[2];
4121
4122         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4123             op_pix = s->dsp.put_pixels_tab;
4124             op_qpix= s->dsp.put_qpel_pixels_tab;
4125         }else{
4126             op_pix = s->dsp.put_no_rnd_pixels_tab;
4127             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4128         }
4129
4130         if (s->mv_dir & MV_DIR_FORWARD) {
4131             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4132             op_pix = s->dsp.avg_pixels_tab;
4133             op_qpix= s->dsp.avg_qpel_pixels_tab;
4134         }
4135         if (s->mv_dir & MV_DIR_BACKWARD) {
4136             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4137         }
4138
4139         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4140             int progressive_score, interlaced_score;
4141
4142             s->interlaced_dct=0;
4143             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
4144                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4145             
4146             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4147
4148             if(progressive_score>0){
4149                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
4150                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4151             
4152                 if(progressive_score > interlaced_score){
4153                     s->interlaced_dct=1;
4154             
4155                     dct_offset= wrap_y;
4156                     wrap_y<<=1;
4157                 }
4158             }
4159         }
4160         
4161         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4162         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4163         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4164         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4165         
4166         if(s->flags&CODEC_FLAG_GRAY){
4167             skip_dct[4]= 1;
4168             skip_dct[5]= 1;
4169         }else{
4170             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4171             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4172         }
4173         /* pre quantization */         
4174         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4175             //FIXME optimize
4176             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4177             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4178             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4179             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4180             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4181             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4182         }
4183     }
4184
4185     if(s->avctx->quantizer_noise_shaping){
4186         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4187         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4188         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4189         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4190         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4191         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4192         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4193     }
4194             
4195     /* DCT & quantize */
4196     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4197     {
4198         for(i=0;i<6;i++) {
4199             if(!skip_dct[i]){
4200                 int overflow;
4201                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4202             // FIXME we could decide to change to quantizer instead of clipping
4203             // JS: I don't think that would be a good idea it could lower quality instead
4204             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4205                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4206             }else
4207                 s->block_last_index[i]= -1;
4208         }
4209         if(s->avctx->quantizer_noise_shaping){
4210             for(i=0;i<6;i++) {
4211                 if(!skip_dct[i]){
4212                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4213                 }
4214             }
4215         }
4216         
4217         if(s->luma_elim_threshold && !s->mb_intra)
4218             for(i=0; i<4; i++)
4219                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4220         if(s->chroma_elim_threshold && !s->mb_intra)
4221             for(i=4; i<6; i++)
4222                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4223
4224         if(s->flags & CODEC_FLAG_CBP_RD){
4225             for(i=0;i<6;i++) {
4226                 if(s->block_last_index[i] == -1)
4227                     s->coded_score[i]= INT_MAX/256;
4228             }
4229         }
4230     }
4231
4232     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4233         s->block_last_index[4]=
4234         s->block_last_index[5]= 0;
4235         s->block[4][0]=
4236         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4237     }
4238
4239     //non c quantize code returns incorrect block_last_index FIXME
4240     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4241         for(i=0; i<6; i++){
4242             int j;
4243             if(s->block_last_index[i]>0){
4244                 for(j=63; j>0; j--){
4245                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4246                 }
4247                 s->block_last_index[i]= j;
4248             }
4249         }
4250     }
4251
4252     /* huffman encode */
4253     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4254     case CODEC_ID_MPEG1VIDEO:
4255     case CODEC_ID_MPEG2VIDEO:
4256         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4257     case CODEC_ID_MPEG4:
4258         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4259     case CODEC_ID_MSMPEG4V2:
4260     case CODEC_ID_MSMPEG4V3:
4261     case CODEC_ID_WMV1:
4262         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4263     case CODEC_ID_WMV2:
4264          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4265     case CODEC_ID_H261:
4266         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4267     case CODEC_ID_H263:
4268     case CODEC_ID_H263P:
4269     case CODEC_ID_FLV1:
4270     case CODEC_ID_RV10:
4271     case CODEC_ID_RV20:
4272         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4273     case CODEC_ID_MJPEG:
4274         mjpeg_encode_mb(s, s->block); break;
4275     default:
4276         assert(0);
4277     }
4278 }
4279
4280 #endif //CONFIG_ENCODERS
4281
4282 void ff_mpeg_flush(AVCodecContext *avctx){
4283     int i;
4284     MpegEncContext *s = avctx->priv_data;
4285     
4286     if(s==NULL || s->picture==NULL) 
4287         return;
4288     
4289     for(i=0; i<MAX_PICTURE_COUNT; i++){
4290        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4291                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4292         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4293     }
4294     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4295     
4296     s->mb_x= s->mb_y= 0;
4297     
4298     s->parse_context.state= -1;
4299     s->parse_context.frame_start_found= 0;
4300     s->parse_context.overread= 0;
4301     s->parse_context.overread_index= 0;
4302     s->parse_context.index= 0;
4303     s->parse_context.last_index= 0;
4304     s->bitstream_buffer_size=0;
4305 }
4306
4307 #ifdef CONFIG_ENCODERS
4308 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4309 {
4310     const uint16_t *srcw= (uint16_t*)src;
4311     int words= length>>4;
4312     int bits= length&15;
4313     int i;
4314
4315     if(length==0) return;
4316     
4317     if(words < 16){
4318         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4319     }else if(put_bits_count(pb)&7){
4320         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4321     }else{
4322         for(i=0; put_bits_count(pb)&31; i++)
4323             put_bits(pb, 8, src[i]);
4324         flush_put_bits(pb);
4325         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4326         skip_put_bytes(pb, 2*words-i);
4327     }
4328         
4329     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4330 }
4331
4332 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4333     int i;
4334
4335     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4336
4337     /* mpeg1 */
4338     d->mb_skip_run= s->mb_skip_run;
4339     for(i=0; i<3; i++)
4340         d->last_dc[i]= s->last_dc[i];
4341     
4342     /* statistics */
4343     d->mv_bits= s->mv_bits;
4344     d->i_tex_bits= s->i_tex_bits;
4345     d->p_tex_bits= s->p_tex_bits;
4346     d->i_count= s->i_count;
4347     d->f_count= s->f_count;
4348     d->b_count= s->b_count;
4349     d->skip_count= s->skip_count;
4350     d->misc_bits= s->misc_bits;
4351     d->last_bits= 0;
4352
4353     d->mb_skiped= 0;
4354     d->qscale= s->qscale;
4355     d->dquant= s->dquant;
4356 }
4357
4358 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4359     int i;
4360
4361     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
4362     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4363     
4364     /* mpeg1 */
4365     d->mb_skip_run= s->mb_skip_run;
4366     for(i=0; i<3; i++)
4367         d->last_dc[i]= s->last_dc[i];
4368     
4369     /* statistics */
4370     d->mv_bits= s->mv_bits;
4371     d->i_tex_bits= s->i_tex_bits;
4372     d->p_tex_bits= s->p_tex_bits;
4373     d->i_count= s->i_count;
4374     d->f_count= s->f_count;
4375     d->b_count= s->b_count;
4376     d->skip_count= s->skip_count;
4377     d->misc_bits= s->misc_bits;
4378
4379     d->mb_intra= s->mb_intra;
4380     d->mb_skiped= s->mb_skiped;
4381     d->mv_type= s->mv_type;
4382     d->mv_dir= s->mv_dir;
4383     d->pb= s->pb;
4384     if(s->data_partitioning){
4385         d->pb2= s->pb2;
4386         d->tex_pb= s->tex_pb;
4387     }
4388     d->block= s->block;
4389     for(i=0; i<6; i++)
4390         d->block_last_index[i]= s->block_last_index[i];
4391     d->interlaced_dct= s->interlaced_dct;
4392     d->qscale= s->qscale;
4393 }
4394
4395 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
4396                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4397                            int *dmin, int *next_block, int motion_x, int motion_y)
4398 {
4399     int score;
4400     uint8_t *dest_backup[3];
4401     
4402     copy_context_before_encode(s, backup, type);
4403
4404     s->block= s->blocks[*next_block];
4405     s->pb= pb[*next_block];
4406     if(s->data_partitioning){
4407         s->pb2   = pb2   [*next_block];
4408         s->tex_pb= tex_pb[*next_block];
4409     }
4410     
4411     if(*next_block){
4412         memcpy(dest_backup, s->dest, sizeof(s->dest));
4413         s->dest[0] = s->rd_scratchpad;
4414         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4415         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4416         assert(s->linesize >= 32); //FIXME
4417     }
4418
4419     encode_mb(s, motion_x, motion_y);
4420     
4421     score= put_bits_count(&s->pb);
4422     if(s->data_partitioning){
4423         score+= put_bits_count(&s->pb2);
4424         score+= put_bits_count(&s->tex_pb);
4425     }
4426    
4427     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4428         MPV_decode_mb(s, s->block);
4429
4430         score *= s->lambda2;
4431         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4432     }
4433     
4434     if(*next_block){
4435         memcpy(s->dest, dest_backup, sizeof(s->dest));
4436     }
4437
4438     if(score<*dmin){
4439         *dmin= score;
4440         *next_block^=1;
4441
4442         copy_context_after_encode(best, s, type);
4443     }
4444 }
4445                 
4446 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4447     uint32_t *sq = squareTbl + 256;
4448     int acc=0;
4449     int x,y;
4450     
4451     if(w==16 && h==16) 
4452         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4453     else if(w==8 && h==8)
4454         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4455     
4456     for(y=0; y<h; y++){
4457         for(x=0; x<w; x++){
4458             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4459         } 
4460     }
4461     
4462     assert(acc>=0);
4463     
4464     return acc;
4465 }
4466
4467 static int sse_mb(MpegEncContext *s){
4468     int w= 16;
4469     int h= 16;
4470
4471     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4472     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4473
4474     if(w==16 && h==16)
4475       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4476         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4477                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4478                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4479       }else{
4480         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4481                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4482                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4483       }
4484     else
4485         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4486                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4487                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4488 }
4489
4490 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4491     MpegEncContext *s= arg;
4492
4493     
4494     s->me.pre_pass=1;
4495     s->me.dia_size= s->avctx->pre_dia_size;
4496     s->first_slice_line=1;
4497     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4498         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4499             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4500         }
4501         s->first_slice_line=0;
4502     }
4503     
4504     s->me.pre_pass=0;
4505     
4506     return 0;
4507 }
4508
4509 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4510     MpegEncContext *s= arg;
4511
4512     s->me.dia_size= s->avctx->dia_size;
4513     s->first_slice_line=1;
4514     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4515         s->mb_x=0; //for block init below
4516         ff_init_block_index(s);
4517         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4518             s->block_index[0]+=2;
4519             s->block_index[1]+=2;
4520             s->block_index[2]+=2;
4521             s->block_index[3]+=2;
4522             
4523             /* compute motion vector & mb_type and store in context */
4524             if(s->pict_type==B_TYPE)
4525                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4526             else
4527                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4528         }
4529         s->first_slice_line=0;
4530     }
4531     return 0;
4532 }
4533
4534 static int mb_var_thread(AVCodecContext *c, void *arg){
4535     MpegEncContext *s= arg;
4536     int mb_x, mb_y;
4537
4538     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4539         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4540             int xx = mb_x * 16;
4541             int yy = mb_y * 16;
4542             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4543             int varc;
4544             int sum = s->dsp.pix_sum(pix, s->linesize);
4545     
4546             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4547
4548             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4549             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4550             s->me.mb_var_sum_temp    += varc;
4551         }
4552     }
4553     return 0;
4554 }
4555
4556 static void write_slice_end(MpegEncContext *s){
4557     if(s->codec_id==CODEC_ID_MPEG4){
4558         if(s->partitioned_frame){
4559             ff_mpeg4_merge_partitions(s);
4560         }
4561     
4562         ff_mpeg4_stuffing(&s->pb);
4563     }else if(s->out_format == FMT_MJPEG){
4564         ff_mjpeg_stuffing(&s->pb);
4565     }
4566
4567     align_put_bits(&s->pb);
4568     flush_put_bits(&s->pb);
4569     
4570     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4571         s->misc_bits+= get_bits_diff(s);
4572 }
4573
4574 static int encode_thread(AVCodecContext *c, void *arg){
4575     MpegEncContext *s= arg;
4576     int mb_x, mb_y, pdif = 0;
4577     int i, j;
4578     MpegEncContext best_s, backup_s;
4579     uint8_t bit_buf[2][MAX_MB_BYTES];
4580     uint8_t bit_buf2[2][MAX_MB_BYTES];
4581     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4582     PutBitContext pb[2], pb2[2], tex_pb[2];
4583 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4584
4585     for(i=0; i<2; i++){
4586         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4587         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4588         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4589     }
4590
4591     s->last_bits= put_bits_count(&s->pb);
4592     s->mv_bits=0;
4593     s->misc_bits=0;
4594     s->i_tex_bits=0;
4595     s->p_tex_bits=0;
4596     s->i_count=0;
4597     s->f_count=0;
4598     s->b_count=0;
4599     s->skip_count=0;
4600
4601     for(i=0; i<3; i++){
4602         /* init last dc values */
4603         /* note: quant matrix value (8) is implied here */
4604         s->last_dc[i] = 128 << s->intra_dc_precision;
4605         
4606         s->current_picture_ptr->error[i] = 0;
4607     }
4608     s->mb_skip_run = 0;
4609     memset(s->last_mv, 0, sizeof(s->last_mv));
4610      
4611     s->last_mv_dir = 0;
4612
4613     switch(s->codec_id){
4614     case CODEC_ID_H263:
4615     case CODEC_ID_H263P:
4616     case CODEC_ID_FLV1:
4617         s->gob_index = ff_h263_get_gob_height(s);
4618         break;
4619     case CODEC_ID_MPEG4:
4620         if(s->partitioned_frame)
4621             ff_mpeg4_init_partitions(s);
4622         break;
4623     }
4624
4625     s->resync_mb_x=0;
4626     s->resync_mb_y=0; 
4627     s->first_slice_line = 1;
4628     s->ptr_lastgob = s->pb.buf;
4629     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4630 //    printf("row %d at %X\n", s->mb_y, (int)s);
4631         s->mb_x=0;
4632         s->mb_y= mb_y;
4633
4634         ff_set_qscale(s, s->qscale);
4635         ff_init_block_index(s);
4636         
4637         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4638             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4639             int mb_type= s->mb_type[xy];
4640 //            int d;
4641             int dmin= INT_MAX;
4642             int dir;
4643
4644             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4645                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4646                 return -1;
4647             }
4648             if(s->data_partitioning){
4649                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4650                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4651                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4652                     return -1;
4653                 }
4654             }
4655
4656             s->mb_x = mb_x;
4657             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4658             ff_update_block_index(s);
4659
4660             if(s->codec_id == CODEC_ID_H261){
4661                 ff_h261_reorder_mb_index(s);
4662                 xy= s->mb_y*s->mb_stride + s->mb_x;
4663                 mb_type= s->mb_type[xy];
4664             }
4665
4666             /* write gob / video packet header  */
4667             if(s->rtp_mode){
4668                 int current_packet_size, is_gob_start;
4669                 
4670                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4671                 
4672                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; 
4673                 
4674                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4675                 
4676                 switch(s->codec_id){
4677                 case CODEC_ID_H263:
4678                 case CODEC_ID_H263P:
4679                     if(!s->h263_slice_structured)
4680                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4681                     break;
4682                 case CODEC_ID_MPEG2VIDEO:
4683                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4684                 case CODEC_ID_MPEG1VIDEO:
4685                     if(s->mb_skip_run) is_gob_start=0;
4686                     break;
4687                 }
4688
4689                 if(is_gob_start){
4690                     if(s->start_mb_y != mb_y || mb_x!=0){
4691                         write_slice_end(s);
4692
4693                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4694                             ff_mpeg4_init_partitions(s);
4695                         }
4696                     }
4697                 
4698                     assert((put_bits_count(&s->pb)&7) == 0);
4699                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4700                     
4701                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4702                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4703                         int d= 100 / s->avctx->error_rate;
4704                         if(r % d == 0){
4705                             current_packet_size=0;
4706 #ifndef ALT_BITSTREAM_WRITER
4707                             s->pb.buf_ptr= s->ptr_lastgob;
4708 #endif
4709                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4710                         }
4711                     }
4712
4713                     if (s->avctx->rtp_callback){
4714                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4715                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4716                     }
4717                     
4718                     switch(s->codec_id){
4719                     case CODEC_ID_MPEG4:
4720                         ff_mpeg4_encode_video_packet_header(s);
4721                         ff_mpeg4_clean_buffers(s);
4722                     break;
4723                     case CODEC_ID_MPEG1VIDEO:
4724                     case CODEC_ID_MPEG2VIDEO:
4725                         ff_mpeg1_encode_slice_header(s);
4726                         ff_mpeg1_clean_buffers(s);
4727                     break;
4728                     case CODEC_ID_H263:
4729                     case CODEC_ID_H263P:
4730                         h263_encode_gob_header(s, mb_y);                       
4731                     break;
4732                     }
4733
4734                     if(s->flags&CODEC_FLAG_PASS1){
4735                         int bits= put_bits_count(&s->pb);
4736                         s->misc_bits+= bits - s->last_bits;
4737                         s->last_bits= bits;
4738                     }
4739     
4740                     s->ptr_lastgob += current_packet_size;
4741                     s->first_slice_line=1;
4742                     s->resync_mb_x=mb_x;
4743                     s->resync_mb_y=mb_y;
4744                 }
4745             }
4746
4747             if(  (s->resync_mb_x   == s->mb_x)
4748                && s->resync_mb_y+1 == s->mb_y){
4749                 s->first_slice_line=0; 
4750             }
4751
4752             s->mb_skiped=0;
4753             s->dquant=0; //only for QP_RD
4754
4755             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
4756                 int next_block=0;
4757                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4758
4759                 copy_context_before_encode(&backup_s, s, -1);
4760                 backup_s.pb= s->pb;
4761                 best_s.data_partitioning= s->data_partitioning;
4762                 best_s.partitioned_frame= s->partitioned_frame;
4763                 if(s->data_partitioning){
4764                     backup_s.pb2= s->pb2;
4765                     backup_s.tex_pb= s->tex_pb;
4766                 }
4767
4768                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4769                     s->mv_dir = MV_DIR_FORWARD;
4770                     s->mv_type = MV_TYPE_16X16;
4771                     s->mb_intra= 0;
4772                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4773                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, 
4775                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4776                 }
4777                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ 
4778                     s->mv_dir = MV_DIR_FORWARD;
4779                     s->mv_type = MV_TYPE_FIELD;
4780                     s->mb_intra= 0;
4781                     for(i=0; i<2; i++){
4782                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4783                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4784                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4785                     }
4786                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, 
4787                                  &dmin, &next_block, 0, 0);
4788                 }
4789                 if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
4790                     s->mv_dir = MV_DIR_FORWARD;
4791                     s->mv_type = MV_TYPE_16X16;
4792                     s->mb_intra= 0;
4793                     s->mv[0][0][0] = 0;
4794                     s->mv[0][0][1] = 0;
4795                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, 
4796                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4797                 }
4798                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){                 
4799                     s->mv_dir = MV_DIR_FORWARD;
4800                     s->mv_type = MV_TYPE_8X8;
4801                     s->mb_intra= 0;
4802                     for(i=0; i<4; i++){
4803                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4804                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4805                     }
4806                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, 
4807                                  &dmin, &next_block, 0, 0);
4808                 }
4809                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4810                     s->mv_dir = MV_DIR_FORWARD;
4811                     s->mv_type = MV_TYPE_16X16;
4812                     s->mb_intra= 0;
4813                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4814                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4815                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, 
4816                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4817                 }
4818                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4819                     s->mv_dir = MV_DIR_BACKWARD;
4820                     s->mv_type = MV_TYPE_16X16;
4821                     s->mb_intra= 0;
4822                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4823                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4824                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
4825                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4826                 }
4827                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4828                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4829                     s->mv_type = MV_TYPE_16X16;
4830                     s->mb_intra= 0;
4831                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4832                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4833                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4834                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4835                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, 
4836                                  &dmin, &next_block, 0, 0);
4837                 }
4838                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4839                     int mx= s->b_direct_mv_table[xy][0];
4840                     int my= s->b_direct_mv_table[xy][1];
4841                     
4842                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4843                     s->mb_intra= 0;
4844                     ff_mpeg4_set_direct_mv(s, mx, my);
4845                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
4846                                  &dmin, &next_block, mx, my);
4847                 }
4848                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ 
4849                     s->mv_dir = MV_DIR_FORWARD;
4850                     s->mv_type = MV_TYPE_FIELD;
4851                     s->mb_intra= 0;
4852                     for(i=0; i<2; i++){
4853                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4854                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4855                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4856                     }
4857                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, 
4858                                  &dmin, &next_block, 0, 0);
4859                 }
4860                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ 
4861                     s->mv_dir = MV_DIR_BACKWARD;
4862                     s->mv_type = MV_TYPE_FIELD;
4863                     s->mb_intra= 0;
4864                     for(i=0; i<2; i++){
4865                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4866                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4867                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4868                     }
4869                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, 
4870                                  &dmin, &next_block, 0, 0);
4871                 }
4872                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ 
4873                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4874                     s->mv_type = MV_TYPE_FIELD;
4875                     s->mb_intra= 0;
4876                     for(dir=0; dir<2; dir++){
4877                         for(i=0; i<2; i++){
4878                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4879                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4880                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4881                         }
4882                     }
4883                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, 
4884                                  &dmin, &next_block, 0, 0);
4885                 }
4886                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4887                     s->mv_dir = 0;
4888                     s->mv_type = MV_TYPE_16X16;
4889                     s->mb_intra= 1;
4890                     s->mv[0][0][0] = 0;
4891                     s->mv[0][0][1] = 0;
4892                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, 
4893                                  &dmin, &next_block, 0, 0);
4894                     if(s->h263_pred || s->h263_aic){
4895                         if(best_s.mb_intra)
4896                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4897                         else
4898                             ff_clean_intra_table_entries(s); //old mode?
4899                     }
4900                 }
4901
4902                 if(s->flags & CODEC_FLAG_QP_RD){
4903                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4904                         const int last_qp= backup_s.qscale;
4905                         int dquant, dir, qp, dc[6];
4906                         DCTELEM ac[6][16];
4907                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4908                         
4909                         assert(backup_s.dquant == 0);
4910
4911                         //FIXME intra
4912                         s->mv_dir= best_s.mv_dir;
4913                         s->mv_type = MV_TYPE_16X16;
4914                         s->mb_intra= best_s.mb_intra;
4915                         s->mv[0][0][0] = best_s.mv[0][0][0];
4916                         s->mv[0][0][1] = best_s.mv[0][0][1];
4917                         s->mv[1][0][0] = best_s.mv[1][0][0];
4918                         s->mv[1][0][1] = best_s.mv[1][0][1];
4919                         
4920                         dir= s->pict_type == B_TYPE ? 2 : 1;
4921                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4922                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4923                             qp= last_qp + dquant;
4924                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4925                                 break;
4926                             backup_s.dquant= dquant;
4927                             if(s->mb_intra && s->dc_val[0]){
4928                                 for(i=0; i<6; i++){
4929                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4930                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4931                                 }
4932                             }
4933
4934                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
4935                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4936                             if(best_s.qscale != qp){
4937                                 if(s->mb_intra && s->dc_val[0]){
4938                                     for(i=0; i<6; i++){
4939                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4940                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4941                                     }
4942                                 }
4943                                 if(dir > 0 && dquant==dir){
4944                                     dquant= 0;
4945                                     dir= -dir;
4946                                 }else
4947                                     break;
4948                             }
4949                         }
4950                         qp= best_s.qscale;
4951                         s->current_picture.qscale_table[xy]= qp;
4952                     }
4953                 }
4954
4955                 copy_context_after_encode(s, &best_s, -1);
4956                 
4957                 pb_bits_count= put_bits_count(&s->pb);
4958                 flush_put_bits(&s->pb);
4959                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4960                 s->pb= backup_s.pb;
4961                 
4962                 if(s->data_partitioning){
4963                     pb2_bits_count= put_bits_count(&s->pb2);
4964                     flush_put_bits(&s->pb2);
4965                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
4966                     s->pb2= backup_s.pb2;
4967                     
4968                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
4969                     flush_put_bits(&s->tex_pb);
4970                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
4971                     s->tex_pb= backup_s.tex_pb;
4972                 }
4973                 s->last_bits= put_bits_count(&s->pb);
4974                
4975                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4976                     ff_h263_update_motion_val(s);
4977         
4978                 if(next_block==0){ //FIXME 16 vs linesize16
4979                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
4980                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
4981                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
4982                 }
4983
4984                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
4985                     MPV_decode_mb(s, s->block);
4986             } else {
4987                 int motion_x, motion_y;
4988                 s->mv_type=MV_TYPE_16X16;
4989                 // only one MB-Type possible
4990                 
4991                 switch(mb_type){
4992                 case CANDIDATE_MB_TYPE_INTRA:
4993                     s->mv_dir = 0;
4994                     s->mb_intra= 1;
4995                     motion_x= s->mv[0][0][0] = 0;
4996                     motion_y= s->mv[0][0][1] = 0;
4997                     break;
4998                 case CANDIDATE_MB_TYPE_INTER:
4999                     s->mv_dir = MV_DIR_FORWARD;
5000                     s->mb_intra= 0;
5001                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5002                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5003                     break;
5004                 case CANDIDATE_MB_TYPE_INTER_I:
5005                     s->mv_dir = MV_DIR_FORWARD;
5006                     s->mv_type = MV_TYPE_FIELD;
5007                     s->mb_intra= 0;
5008                     for(i=0; i<2; i++){
5009                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5010                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5011                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5012                     }
5013                     motion_x = motion_y = 0;
5014                     break;
5015                 case CANDIDATE_MB_TYPE_INTER4V:
5016                     s->mv_dir = MV_DIR_FORWARD;
5017                     s->mv_type = MV_TYPE_8X8;
5018                     s->mb_intra= 0;
5019                     for(i=0; i<4; i++){
5020                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5021                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5022                     }
5023                     motion_x= motion_y= 0;
5024                     break;
5025                 case CANDIDATE_MB_TYPE_DIRECT:
5026                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5027                     s->mb_intra= 0;
5028                     motion_x=s->b_direct_mv_table[xy][0];
5029                     motion_y=s->b_direct_mv_table[xy][1];
5030                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5031                     break;
5032                 case CANDIDATE_MB_TYPE_BIDIR:
5033                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5034                     s->mb_intra= 0;
5035                     motion_x=0;
5036                     motion_y=0;
5037                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5038                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5039                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5040                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5041                     break;
5042                 case CANDIDATE_MB_TYPE_BACKWARD:
5043                     s->mv_dir = MV_DIR_BACKWARD;
5044                     s->mb_intra= 0;
5045                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5046                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5047                     break;
5048                 case CANDIDATE_MB_TYPE_FORWARD:
5049                     s->mv_dir = MV_DIR_FORWARD;
5050                     s->mb_intra= 0;
5051                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5052                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5053 //                    printf(" %d %d ", motion_x, motion_y);
5054                     break;
5055                 case CANDIDATE_MB_TYPE_FORWARD_I:
5056                     s->mv_dir = MV_DIR_FORWARD;
5057                     s->mv_type = MV_TYPE_FIELD;
5058                     s->mb_intra= 0;
5059                     for(i=0; i<2; i++){
5060                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5061                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5062                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5063                     }
5064                     motion_x=motion_y=0;
5065                     break;
5066                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5067                     s->mv_dir = MV_DIR_BACKWARD;
5068                     s->mv_type = MV_TYPE_FIELD;
5069                     s->mb_intra= 0;
5070                     for(i=0; i<2; i++){
5071                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5072                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5073                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5074                     }
5075                     motion_x=motion_y=0;
5076                     break;
5077                 case CANDIDATE_MB_TYPE_BIDIR_I:
5078                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5079                     s->mv_type = MV_TYPE_FIELD;
5080                     s->mb_intra= 0;
5081                     for(dir=0; dir<2; dir++){
5082                         for(i=0; i<2; i++){
5083                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5084                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5085                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5086                         }
5087                     }
5088                     motion_x=motion_y=0;
5089                     break;
5090                 default:
5091                     motion_x=motion_y=0; //gcc warning fix
5092                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5093                 }
5094
5095                 encode_mb(s, motion_x, motion_y);
5096
5097                 // RAL: Update last macrobloc type
5098                 s->last_mv_dir = s->mv_dir;
5099             
5100                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5101                     ff_h263_update_motion_val(s);
5102                 
5103                 MPV_decode_mb(s, s->block);
5104             }
5105
5106             /* clean the MV table in IPS frames for direct mode in B frames */
5107             if(s->mb_intra /* && I,P,S_TYPE */){
5108                 s->p_mv_table[xy][0]=0;
5109                 s->p_mv_table[xy][1]=0;
5110             }
5111             
5112             if(s->flags&CODEC_FLAG_PSNR){
5113                 int w= 16;
5114                 int h= 16;
5115
5116                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5117                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5118
5119                 s->current_picture_ptr->error[0] += sse(
5120                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5121                     s->dest[0], w, h, s->linesize);
5122                 s->current_picture_ptr->error[1] += sse(
5123                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5124                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5125                 s->current_picture_ptr->error[2] += sse(
5126                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5127                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5128             }
5129             if(s->loop_filter){
5130                 if(s->out_format == FMT_H263)
5131                     ff_h263_loop_filter(s);
5132             }
5133 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5134         }
5135     }
5136
5137     //not beautifull here but we must write it before flushing so it has to be here
5138     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5139         msmpeg4_encode_ext_header(s);
5140
5141     write_slice_end(s);
5142
5143     /* Send the last GOB if RTP */    
5144     if (s->avctx->rtp_callback) {
5145         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5146         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5147         /* Call the RTP callback to send the last GOB */
5148         emms_c();
5149         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5150     }
5151
5152     return 0;
5153 }
5154
5155 #define MERGE(field) dst->field += src->field; src->field=0
5156 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5157     MERGE(me.scene_change_score);
5158     MERGE(me.mc_mb_var_sum_temp);
5159     MERGE(me.mb_var_sum_temp);
5160 }
5161
5162 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5163     int i;
5164
5165     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5166     MERGE(dct_count[1]);
5167     MERGE(mv_bits);
5168     MERGE(i_tex_bits);
5169     MERGE(p_tex_bits);
5170     MERGE(i_count);
5171     MERGE(f_count);
5172     MERGE(b_count);
5173     MERGE(skip_count);
5174     MERGE(misc_bits);
5175     MERGE(error_count);
5176     MERGE(padding_bug_score);
5177
5178     if(dst->avctx->noise_reduction){
5179         for(i=0; i<64; i++){
5180             MERGE(dct_error_sum[0][i]);
5181             MERGE(dct_error_sum[1][i]);
5182         }
5183     }
5184     
5185     assert(put_bits_count(&src->pb) % 8 ==0);
5186     assert(put_bits_count(&dst->pb) % 8 ==0);
5187     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5188     flush_put_bits(&dst->pb);
5189 }
5190
5191 static void encode_picture(MpegEncContext *s, int picture_number)
5192 {
5193     int i;
5194     int bits;
5195
5196     s->picture_number = picture_number;
5197     
5198     /* Reset the average MB variance */
5199     s->me.mb_var_sum_temp    =
5200     s->me.mc_mb_var_sum_temp = 0;
5201
5202     /* we need to initialize some time vars before we can encode b-frames */
5203     // RAL: Condition added for MPEG1VIDEO
5204     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5205         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5206         
5207     s->me.scene_change_score=0;
5208     
5209 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5210     
5211     if(s->pict_type==I_TYPE){
5212         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5213         else                        s->no_rounding=0;
5214     }else if(s->pict_type!=B_TYPE){
5215         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5216             s->no_rounding ^= 1;          
5217     }
5218     
5219     s->mb_intra=0; //for the rate distoration & bit compare functions
5220     for(i=1; i<s->avctx->thread_count; i++){
5221         ff_update_duplicate_context(s->thread_context[i], s);
5222     }
5223
5224     ff_init_me(s);
5225
5226     /* Estimate motion for every MB */
5227     if(s->pict_type != I_TYPE){
5228         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5229             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5230                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5231             }
5232         }
5233
5234         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5235     }else /* if(s->pict_type == I_TYPE) */{
5236         /* I-Frame */
5237         for(i=0; i<s->mb_stride*s->mb_height; i++)
5238             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5239         
5240         if(!s->fixed_qscale){
5241             /* finding spatial complexity for I-frame rate control */
5242             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5243         }
5244     }
5245     for(i=1; i<s->avctx->thread_count; i++){
5246         merge_context_after_me(s, s->thread_context[i]);
5247     }
5248     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5249     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5250     emms_c();
5251
5252     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5253         s->pict_type= I_TYPE;
5254         for(i=0; i<s->mb_stride*s->mb_height; i++)
5255             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5256 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5257     }
5258
5259     if(!s->umvplus){
5260         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5261             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5262
5263             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5264                 int a,b;
5265                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5266                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5267                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5268             }
5269                     
5270             ff_fix_long_p_mvs(s);
5271             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5272             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5273                 int j;
5274                 for(i=0; i<2; i++){
5275                     for(j=0; j<2; j++)
5276                         ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
5277                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5278                 }
5279             }
5280         }
5281
5282         if(s->pict_type==B_TYPE){
5283             int a, b;
5284
5285             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5286             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5287             s->f_code = FFMAX(a, b);
5288
5289             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5290             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5291             s->b_code = FFMAX(a, b);
5292
5293             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5294             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5295             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5296             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5297             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5298                 int dir, j;
5299                 for(dir=0; dir<2; dir++){
5300                     for(i=0; i<2; i++){
5301                         for(j=0; j<2; j++){
5302                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
5303                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5304                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
5305                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5306                         }
5307                     }
5308                 }
5309             }
5310         }
5311     }
5312
5313     if (!s->fixed_qscale) 
5314         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
5315
5316     if(s->adaptive_quant){
5317         switch(s->codec_id){
5318         case CODEC_ID_MPEG4:
5319             ff_clean_mpeg4_qscales(s);
5320             break;
5321         case CODEC_ID_H263:
5322         case CODEC_ID_H263P:
5323         case CODEC_ID_FLV1:
5324             ff_clean_h263_qscales(s);
5325             break;
5326         }
5327
5328         s->lambda= s->lambda_table[0];
5329         //FIXME broken
5330     }else
5331         s->lambda= s->current_picture.quality;
5332 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5333     update_qscale(s);
5334     
5335     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
5336         s->qscale= 3; //reduce cliping problems
5337         
5338     if (s->out_format == FMT_MJPEG) {
5339         /* for mjpeg, we do include qscale in the matrix */
5340         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5341         for(i=1;i<64;i++){
5342             int j= s->dsp.idct_permutation[i];
5343
5344             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5345         }
5346         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
5347                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5348         s->qscale= 8;
5349     }
5350     
5351     //FIXME var duplication
5352     s->current_picture_ptr->key_frame=
5353     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5354     s->current_picture_ptr->pict_type=
5355     s->current_picture.pict_type= s->pict_type;
5356
5357     if(s->current_picture.key_frame)
5358         s->picture_in_gop_number=0;
5359
5360     s->last_bits= put_bits_count(&s->pb);
5361     switch(s->out_format) {
5362     case FMT_MJPEG:
5363         mjpeg_picture_header(s);
5364         break;
5365     case FMT_H261:
5366         ff_h261_encode_picture_header(s, picture_number);
5367         break;
5368     case FMT_H263:
5369         if (s->codec_id == CODEC_ID_WMV2) 
5370             ff_wmv2_encode_picture_header(s, picture_number);
5371         else if (s->h263_msmpeg4) 
5372             msmpeg4_encode_picture_header(s, picture_number);
5373         else if (s->h263_pred)
5374             mpeg4_encode_picture_header(s, picture_number);
5375         else if (s->codec_id == CODEC_ID_RV10) 
5376             rv10_encode_picture_header(s, picture_number);
5377         else if (s->codec_id == CODEC_ID_RV20) 
5378             rv20_encode_picture_header(s, picture_number);
5379         else if (s->codec_id == CODEC_ID_FLV1)
5380             ff_flv_encode_picture_header(s, picture_number);
5381         else
5382             h263_encode_picture_header(s, picture_number);
5383         break;
5384     case FMT_MPEG1:
5385         mpeg1_encode_picture_header(s, picture_number);
5386         break;
5387     case FMT_H264:
5388         break;
5389     default:
5390         assert(0);
5391     }
5392     bits= put_bits_count(&s->pb);
5393     s->header_bits= bits - s->last_bits;
5394         
5395     for(i=1; i<s->avctx->thread_count; i++){
5396         update_duplicate_context_after_me(s->thread_context[i], s);
5397     }
5398     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5399     for(i=1; i<s->avctx->thread_count; i++){
5400         merge_context_after_encode(s, s->thread_context[i]);
5401     }
5402     emms_c();
5403 }
5404
5405 #endif //CONFIG_ENCODERS
5406
5407 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5408     const int intra= s->mb_intra;
5409     int i;
5410
5411     s->dct_count[intra]++;
5412
5413     for(i=0; i<64; i++){
5414         int level= block[i];
5415
5416         if(level){
5417             if(level>0){
5418                 s->dct_error_sum[intra][i] += level;
5419                 level -= s->dct_offset[intra][i];
5420                 if(level<0) level=0;
5421             }else{
5422                 s->dct_error_sum[intra][i] -= level;
5423                 level += s->dct_offset[intra][i];
5424                 if(level>0) level=0;
5425             }
5426             block[i]= level;
5427         }
5428     }
5429 }
5430
5431 #ifdef CONFIG_ENCODERS
5432
5433 static int dct_quantize_trellis_c(MpegEncContext *s, 
5434                         DCTELEM *block, int n,
5435                         int qscale, int *overflow){
5436     const int *qmat;
5437     const uint8_t *scantable= s->intra_scantable.scantable;
5438     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5439     int max=0;
5440     unsigned int threshold1, threshold2;
5441     int bias=0;
5442     int run_tab[65];
5443     int level_tab[65];
5444     int score_tab[65];
5445     int survivor[65];
5446     int survivor_count;
5447     int last_run=0;
5448     int last_level=0;
5449     int last_score= 0;
5450     int last_i;
5451     int coeff[2][64];
5452     int coeff_count[64];
5453     int qmul, qadd, start_i, last_non_zero, i, dc;
5454     const int esc_length= s->ac_esc_length;
5455     uint8_t * length;
5456     uint8_t * last_length;
5457     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5458         
5459     s->dsp.fdct (block);
5460     
5461     if(s->dct_error_sum)
5462         s->denoise_dct(s, block);
5463     qmul= qscale*16;
5464     qadd= ((qscale-1)|1)*8;
5465
5466     if (s->mb_intra) {
5467         int q;
5468         if (!s->h263_aic) {
5469             if (n < 4)
5470                 q = s->y_dc_scale;
5471             else
5472                 q = s->c_dc_scale;
5473             q = q << 3;
5474         } else{
5475             /* For AIC we skip quant/dequant of INTRADC */
5476             q = 1 << 3;
5477             qadd=0;
5478         }
5479             
5480         /* note: block[0] is assumed to be positive */
5481         block[0] = (block[0] + (q >> 1)) / q;
5482         start_i = 1;
5483         last_non_zero = 0;
5484         qmat = s->q_intra_matrix[qscale];
5485         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5486             bias= 1<<(QMAT_SHIFT-1);
5487         length     = s->intra_ac_vlc_length;
5488         last_length= s->intra_ac_vlc_last_length;
5489     } else {
5490         start_i = 0;
5491         last_non_zero = -1;
5492         qmat = s->q_inter_matrix[qscale];
5493         length     = s->inter_ac_vlc_length;
5494         last_length= s->inter_ac_vlc_last_length;
5495     }
5496     last_i= start_i;
5497
5498     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5499     threshold2= (threshold1<<1);
5500
5501     for(i=63; i>=start_i; i--) {
5502         const int j = scantable[i];
5503         int level = block[j] * qmat[j];
5504
5505         if(((unsigned)(level+threshold1))>threshold2){
5506             last_non_zero = i;
5507             break;
5508         }
5509     }
5510
5511     for(i=start_i; i<=last_non_zero; i++) {
5512         const int j = scantable[i];
5513         int level = block[j] * qmat[j];
5514
5515 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5516 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5517         if(((unsigned)(level+threshold1))>threshold2){
5518             if(level>0){
5519                 level= (bias + level)>>QMAT_SHIFT;
5520                 coeff[0][i]= level;
5521                 coeff[1][i]= level-1;
5522 //                coeff[2][k]= level-2;
5523             }else{
5524                 level= (bias - level)>>QMAT_SHIFT;
5525                 coeff[0][i]= -level;
5526                 coeff[1][i]= -level+1;
5527 //                coeff[2][k]= -level+2;
5528             }
5529             coeff_count[i]= FFMIN(level, 2);
5530             assert(coeff_count[i]);
5531             max |=level;
5532         }else{
5533             coeff[0][i]= (level>>31)|1;
5534             coeff_count[i]= 1;
5535         }
5536     }
5537     
5538     *overflow= s->max_qcoeff < max; //overflow might have happend
5539     
5540     if(last_non_zero < start_i){
5541         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5542         return last_non_zero;
5543     }
5544
5545     score_tab[start_i]= 0;
5546     survivor[0]= start_i;
5547     survivor_count= 1;
5548     
5549     for(i=start_i; i<=last_non_zero; i++){
5550         int level_index, j;
5551         const int dct_coeff= ABS(block[ scantable[i] ]);
5552         const int zero_distoration= dct_coeff*dct_coeff;
5553         int best_score=256*256*256*120;
5554         for(level_index=0; level_index < coeff_count[i]; level_index++){
5555             int distoration;
5556             int level= coeff[level_index][i];
5557             const int alevel= ABS(level);
5558             int unquant_coeff;
5559             
5560             assert(level);
5561
5562             if(s->out_format == FMT_H263){
5563                 unquant_coeff= alevel*qmul + qadd;
5564             }else{ //MPEG1
5565                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5566                 if(s->mb_intra){
5567                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5568                         unquant_coeff =   (unquant_coeff - 1) | 1;
5569                 }else{
5570                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5571                         unquant_coeff =   (unquant_coeff - 1) | 1;
5572                 }
5573                 unquant_coeff<<= 3;
5574             }
5575
5576             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5577             level+=64;
5578             if((level&(~127)) == 0){
5579                 for(j=survivor_count-1; j>=0; j--){
5580                     int run= i - survivor[j];
5581                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5582                     score += score_tab[i-run];
5583                     
5584                     if(score < best_score){
5585                         best_score= score;
5586                         run_tab[i+1]= run;
5587                         level_tab[i+1]= level-64;
5588                     }
5589                 }
5590
5591                 if(s->out_format == FMT_H263){
5592                     for(j=survivor_count-1; j>=0; j--){
5593                         int run= i - survivor[j];
5594                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5595                         score += score_tab[i-run];
5596                         if(score < last_score){
5597                             last_score= score;
5598                             last_run= run;
5599                             last_level= level-64;
5600                             last_i= i+1;
5601                         }
5602                     }
5603                 }
5604             }else{
5605                 distoration += esc_length*lambda;
5606                 for(j=survivor_count-1; j>=0; j--){
5607                     int run= i - survivor[j];
5608                     int score= distoration + score_tab[i-run];
5609                     
5610                     if(score < best_score){
5611                         best_score= score;
5612                         run_tab[i+1]= run;
5613                         level_tab[i+1]= level-64;
5614                     }
5615                 }
5616
5617                 if(s->out_format == FMT_H263){
5618                   for(j=survivor_count-1; j>=0; j--){
5619                         int run= i - survivor[j];
5620                         int score= distoration + score_tab[i-run];
5621                         if(score < last_score){
5622                             last_score= score;
5623                             last_run= run;
5624                             last_level= level-64;
5625                             last_i= i+1;
5626                         }
5627                     }
5628                 }
5629             }
5630         }
5631         
5632         score_tab[i+1]= best_score;
5633
5634         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5635         if(last_non_zero <= 27){
5636             for(; survivor_count; survivor_count--){
5637                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5638                     break;
5639             }
5640         }else{
5641             for(; survivor_count; survivor_count--){
5642                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5643                     break;
5644             }
5645         }
5646
5647         survivor[ survivor_count++ ]= i+1;
5648     }
5649
5650     if(s->out_format != FMT_H263){
5651         last_score= 256*256*256*120;
5652         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5653             int score= score_tab[i];
5654             if(i) score += lambda*2; //FIXME exacter?
5655
5656             if(score < last_score){
5657                 last_score= score;
5658                 last_i= i;
5659                 last_level= level_tab[i];
5660                 last_run= run_tab[i];
5661             }
5662         }
5663     }
5664
5665     s->coded_score[n] = last_score;
5666     
5667     dc= ABS(block[0]);
5668     last_non_zero= last_i - 1;
5669     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5670     
5671     if(last_non_zero < start_i)
5672         return last_non_zero;
5673
5674     if(last_non_zero == 0 && start_i == 0){
5675         int best_level= 0;
5676         int best_score= dc * dc;
5677         
5678         for(i=0; i<coeff_count[0]; i++){
5679             int level= coeff[i][0];
5680             int alevel= ABS(level);
5681             int unquant_coeff, score, distortion;
5682
5683             if(s->out_format == FMT_H263){
5684                     unquant_coeff= (alevel*qmul + qadd)>>3;
5685             }else{ //MPEG1
5686                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5687                     unquant_coeff =   (unquant_coeff - 1) | 1;
5688             }
5689             unquant_coeff = (unquant_coeff + 4) >> 3;
5690             unquant_coeff<<= 3 + 3;
5691
5692             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5693             level+=64;
5694             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5695             else                    score= distortion + esc_length*lambda;
5696
5697             if(score < best_score){
5698                 best_score= score;
5699                 best_level= level - 64;
5700             }
5701         }
5702         block[0]= best_level;
5703         s->coded_score[n] = best_score - dc*dc;
5704         if(best_level == 0) return -1;
5705         else                return last_non_zero;
5706     }
5707
5708     i= last_i;
5709     assert(last_level);
5710
5711     block[ perm_scantable[last_non_zero] ]= last_level;
5712     i -= last_run + 1;
5713     
5714     for(; i>start_i; i -= run_tab[i] + 1){
5715         block[ perm_scantable[i-1] ]= level_tab[i];
5716     }
5717
5718     return last_non_zero;
5719 }
5720
5721 //#define REFINE_STATS 1
5722 static int16_t basis[64][64];
5723
5724 static void build_basis(uint8_t *perm){
5725     int i, j, x, y;
5726     emms_c();
5727     for(i=0; i<8; i++){
5728         for(j=0; j<8; j++){
5729             for(y=0; y<8; y++){
5730                 for(x=0; x<8; x++){
5731                     double s= 0.25*(1<<BASIS_SHIFT);
5732                     int index= 8*i + j;
5733                     int perm_index= perm[index];
5734                     if(i==0) s*= sqrt(0.5);
5735                     if(j==0) s*= sqrt(0.5);
5736                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5737                 }
5738             }
5739         }
5740     }
5741 }
5742
5743 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5744                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5745                         int n, int qscale){
5746     int16_t rem[64];
5747     DCTELEM d1[64] __align16;
5748     const int *qmat;
5749     const uint8_t *scantable= s->intra_scantable.scantable;
5750     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5751 //    unsigned int threshold1, threshold2;
5752 //    int bias=0;
5753     int run_tab[65];
5754     int prev_run=0;
5755     int prev_level=0;
5756     int qmul, qadd, start_i, last_non_zero, i, dc;
5757     uint8_t * length;
5758     uint8_t * last_length;
5759     int lambda;
5760     int rle_index, run, q, sum;
5761 #ifdef REFINE_STATS
5762 static int count=0;
5763 static int after_last=0;
5764 static int to_zero=0;
5765 static int from_zero=0;
5766 static int raise=0;
5767 static int lower=0;
5768 static int messed_sign=0;
5769 #endif
5770
5771     if(basis[0][0] == 0)
5772         build_basis(s->dsp.idct_permutation);
5773     
5774     qmul= qscale*2;
5775     qadd= (qscale-1)|1;
5776     if (s->mb_intra) {
5777         if (!s->h263_aic) {
5778             if (n < 4)
5779                 q = s->y_dc_scale;
5780             else
5781                 q = s->c_dc_scale;
5782         } else{
5783             /* For AIC we skip quant/dequant of INTRADC */
5784             q = 1;
5785             qadd=0;
5786         }
5787         q <<= RECON_SHIFT-3;
5788         /* note: block[0] is assumed to be positive */
5789         dc= block[0]*q;
5790 //        block[0] = (block[0] + (q >> 1)) / q;
5791         start_i = 1;
5792         qmat = s->q_intra_matrix[qscale];
5793 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5794 //            bias= 1<<(QMAT_SHIFT-1);
5795         length     = s->intra_ac_vlc_length;
5796         last_length= s->intra_ac_vlc_last_length;
5797     } else {
5798         dc= 0;
5799         start_i = 0;
5800         qmat = s->q_inter_matrix[qscale];
5801         length     = s->inter_ac_vlc_length;
5802         last_length= s->inter_ac_vlc_last_length;
5803     }
5804     last_non_zero = s->block_last_index[n];
5805
5806 #ifdef REFINE_STATS
5807 {START_TIMER
5808 #endif
5809     dc += (1<<(RECON_SHIFT-1));
5810     for(i=0; i<64; i++){
5811         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly insteadof copying to rem[]
5812     }
5813 #ifdef REFINE_STATS
5814 STOP_TIMER("memset rem[]")}
5815 #endif
5816     sum=0;
5817     for(i=0; i<64; i++){
5818         int one= 36;
5819         int qns=4;
5820         int w;
5821
5822         w= ABS(weight[i]) + qns*one;
5823         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5824
5825         weight[i] = w;
5826 //        w=weight[i] = (63*qns + (w/2)) / w;
5827          
5828         assert(w>0);
5829         assert(w<(1<<6));
5830         sum += w*w;
5831     }
5832     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5833 #ifdef REFINE_STATS
5834 {START_TIMER
5835 #endif
5836     run=0;
5837     rle_index=0;
5838     for(i=start_i; i<=last_non_zero; i++){
5839         int j= perm_scantable[i];
5840         const int level= block[j];
5841         int coeff;
5842         
5843         if(level){
5844             if(level<0) coeff= qmul*level - qadd;
5845             else        coeff= qmul*level + qadd;
5846             run_tab[rle_index++]=run;
5847             run=0;
5848
5849             s->dsp.add_8x8basis(rem, basis[j], coeff);
5850         }else{
5851             run++;
5852         }
5853     }
5854 #ifdef REFINE_STATS
5855 if(last_non_zero>0){
5856 STOP_TIMER("init rem[]")
5857 }
5858 }
5859
5860 {START_TIMER
5861 #endif
5862     for(;;){
5863         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5864         int best_coeff=0;
5865         int best_change=0;
5866         int run2, best_unquant_change=0, analyze_gradient;
5867 #ifdef REFINE_STATS
5868 {START_TIMER
5869 #endif
5870         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5871
5872         if(analyze_gradient){
5873 #ifdef REFINE_STATS
5874 {START_TIMER
5875 #endif
5876             for(i=0; i<64; i++){
5877                 int w= weight[i];
5878             
5879                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5880             }
5881 #ifdef REFINE_STATS
5882 STOP_TIMER("rem*w*w")}
5883 {START_TIMER
5884 #endif
5885             s->dsp.fdct(d1);
5886 #ifdef REFINE_STATS
5887 STOP_TIMER("dct")}
5888 #endif
5889         }
5890
5891         if(start_i){
5892             const int level= block[0];
5893             int change, old_coeff;
5894
5895             assert(s->mb_intra);
5896             
5897             old_coeff= q*level;
5898             
5899             for(change=-1; change<=1; change+=2){
5900                 int new_level= level + change;
5901                 int score, new_coeff;
5902                 
5903                 new_coeff= q*new_level;
5904                 if(new_coeff >= 2048 || new_coeff < 0)
5905                     continue;
5906
5907                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5908                 if(score<best_score){
5909                     best_score= score;
5910                     best_coeff= 0;
5911                     best_change= change;
5912                     best_unquant_change= new_coeff - old_coeff;
5913                 }
5914             }
5915         }
5916         
5917         run=0;
5918         rle_index=0;
5919         run2= run_tab[rle_index++];
5920         prev_level=0;
5921         prev_run=0;
5922
5923         for(i=start_i; i<64; i++){
5924             int j= perm_scantable[i];
5925             const int level= block[j];
5926             int change, old_coeff;
5927
5928             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5929                 break;
5930
5931             if(level){
5932                 if(level<0) old_coeff= qmul*level - qadd;
5933                 else        old_coeff= qmul*level + qadd;
5934                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5935             }else{
5936                 old_coeff=0;
5937                 run2--;
5938                 assert(run2>=0 || i >= last_non_zero );
5939             }
5940             
5941             for(change=-1; change<=1; change+=2){
5942                 int new_level= level + change;
5943                 int score, new_coeff, unquant_change;
5944                 
5945                 score=0;
5946                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5947                    continue;
5948
5949                 if(new_level){
5950                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5951                     else            new_coeff= qmul*new_level + qadd;
5952                     if(new_coeff >= 2048 || new_coeff <= -2048)
5953                         continue;
5954                     //FIXME check for overflow
5955                     
5956                     if(level){
5957                         if(level < 63 && level > -63){
5958                             if(i < last_non_zero)
5959                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
5960                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
5961                             else
5962                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
5963                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
5964                         }
5965                     }else{
5966                         assert(ABS(new_level)==1);
5967                         
5968                         if(analyze_gradient){
5969                             int g= d1[ scantable[i] ];
5970                             if(g && (g^new_level) >= 0)
5971                                 continue;
5972                         }
5973
5974                         if(i < last_non_zero){
5975                             int next_i= i + run2 + 1;
5976                             int next_level= block[ perm_scantable[next_i] ] + 64;
5977                             
5978                             if(next_level&(~127))
5979                                 next_level= 0;
5980
5981                             if(next_i < last_non_zero)
5982                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
5983                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
5984                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5985                             else
5986                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
5987                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5988                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5989                         }else{
5990                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
5991                             if(prev_level){
5992                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5993                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5994                             }
5995                         }
5996                     }
5997                 }else{
5998                     new_coeff=0;
5999                     assert(ABS(level)==1);
6000
6001                     if(i < last_non_zero){
6002                         int next_i= i + run2 + 1;
6003                         int next_level= block[ perm_scantable[next_i] ] + 64;
6004                             
6005                         if(next_level&(~127))
6006                             next_level= 0;
6007
6008                         if(next_i < last_non_zero)
6009                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6010                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6011                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6012                         else
6013                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6014                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6015                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6016                     }else{
6017                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6018                         if(prev_level){
6019                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6020                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6021                         }
6022                     }
6023                 }
6024                 
6025                 score *= lambda;
6026
6027                 unquant_change= new_coeff - old_coeff;
6028                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6029                 
6030                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6031                 if(score<best_score){
6032                     best_score= score;
6033                     best_coeff= i;
6034                     best_change= change;
6035                     best_unquant_change= unquant_change;
6036                 }
6037             }
6038             if(level){
6039                 prev_level= level + 64;
6040                 if(prev_level&(~127))
6041                     prev_level= 0;
6042                 prev_run= run;
6043                 run=0;
6044             }else{
6045                 run++;
6046             }
6047         }
6048 #ifdef REFINE_STATS
6049 STOP_TIMER("iterative step")}
6050 #endif
6051
6052         if(best_change){
6053             int j= perm_scantable[ best_coeff ];
6054             
6055             block[j] += best_change;
6056             
6057             if(best_coeff > last_non_zero){
6058                 last_non_zero= best_coeff;
6059                 assert(block[j]);
6060 #ifdef REFINE_STATS
6061 after_last++;
6062 #endif
6063             }else{
6064 #ifdef REFINE_STATS
6065 if(block[j]){
6066     if(block[j] - best_change){
6067         if(ABS(block[j]) > ABS(block[j] - best_change)){
6068             raise++;
6069         }else{
6070             lower++;
6071         }
6072     }else{
6073         from_zero++;
6074     }
6075 }else{
6076     to_zero++;
6077 }
6078 #endif
6079                 for(; last_non_zero>=start_i; last_non_zero--){
6080                     if(block[perm_scantable[last_non_zero]])
6081                         break;
6082                 }
6083             }
6084 #ifdef REFINE_STATS
6085 count++;
6086 if(256*256*256*64 % count == 0){
6087     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6088 }
6089 #endif
6090             run=0;
6091             rle_index=0;
6092             for(i=start_i; i<=last_non_zero; i++){
6093                 int j= perm_scantable[i];
6094                 const int level= block[j];
6095         
6096                  if(level){
6097                      run_tab[rle_index++]=run;
6098                      run=0;
6099                  }else{
6100                      run++;
6101                  }
6102             }
6103             
6104             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6105         }else{
6106             break;
6107         }
6108     }
6109 #ifdef REFINE_STATS
6110 if(last_non_zero>0){
6111 STOP_TIMER("iterative search")
6112 }
6113 }
6114 #endif
6115
6116     return last_non_zero;
6117 }
6118
6119 static int dct_quantize_c(MpegEncContext *s, 
6120                         DCTELEM *block, int n,
6121                         int qscale, int *overflow)
6122 {
6123     int i, j, level, last_non_zero, q, start_i;
6124     const int *qmat;
6125     const uint8_t *scantable= s->intra_scantable.scantable;
6126     int bias;
6127     int max=0;
6128     unsigned int threshold1, threshold2;
6129
6130     s->dsp.fdct (block);
6131
6132     if(s->dct_error_sum)
6133         s->denoise_dct(s, block);
6134
6135     if (s->mb_intra) {
6136         if (!s->h263_aic) {
6137             if (n < 4)
6138                 q = s->y_dc_scale;
6139             else
6140                 q = s->c_dc_scale;
6141             q = q << 3;
6142         } else
6143             /* For AIC we skip quant/dequant of INTRADC */
6144             q = 1 << 3;
6145             
6146         /* note: block[0] is assumed to be positive */
6147         block[0] = (block[0] + (q >> 1)) / q;
6148         start_i = 1;
6149         last_non_zero = 0;
6150         qmat = s->q_intra_matrix[qscale];
6151         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6152     } else {
6153         start_i = 0;
6154         last_non_zero = -1;
6155         qmat = s->q_inter_matrix[qscale];
6156         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6157     }
6158     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6159     threshold2= (threshold1<<1);
6160     for(i=63;i>=start_i;i--) {
6161         j = scantable[i];
6162         level = block[j] * qmat[j];
6163
6164         if(((unsigned)(level+threshold1))>threshold2){
6165             last_non_zero = i;
6166             break;
6167         }else{
6168             block[j]=0;
6169         }
6170     }
6171     for(i=start_i; i<=last_non_zero; i++) {
6172         j = scantable[i];
6173         level = block[j] * qmat[j];
6174
6175 //        if(   bias+level >= (1<<QMAT_SHIFT)
6176 //           || bias-level >= (1<<QMAT_SHIFT)){
6177         if(((unsigned)(level+threshold1))>threshold2){
6178             if(level>0){
6179                 level= (bias + level)>>QMAT_SHIFT;
6180                 block[j]= level;
6181             }else{
6182                 level= (bias - level)>>QMAT_SHIFT;
6183                 block[j]= -level;
6184             }
6185             max |=level;
6186         }else{
6187             block[j]=0;
6188         }
6189     }
6190     *overflow= s->max_qcoeff < max; //overflow might have happend
6191     
6192     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6193     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6194         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6195
6196     return last_non_zero;
6197 }
6198
6199 #endif //CONFIG_ENCODERS
6200
6201 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
6202                                    DCTELEM *block, int n, int qscale)
6203 {
6204     int i, level, nCoeffs;
6205     const uint16_t *quant_matrix;
6206
6207     nCoeffs= s->block_last_index[n];
6208     
6209     if (n < 4) 
6210         block[0] = block[0] * s->y_dc_scale;
6211     else
6212         block[0] = block[0] * s->c_dc_scale;
6213     /* XXX: only mpeg1 */
6214     quant_matrix = s->intra_matrix;
6215     for(i=1;i<=nCoeffs;i++) {
6216         int j= s->intra_scantable.permutated[i];
6217         level = block[j];
6218         if (level) {
6219             if (level < 0) {
6220                 level = -level;
6221                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6222                 level = (level - 1) | 1;
6223                 level = -level;
6224             } else {
6225                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6226                 level = (level - 1) | 1;
6227             }
6228             block[j] = level;
6229         }
6230     }
6231 }
6232
6233 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
6234                                    DCTELEM *block, int n, int qscale)
6235 {
6236     int i, level, nCoeffs;
6237     const uint16_t *quant_matrix;
6238
6239     nCoeffs= s->block_last_index[n];
6240     
6241     quant_matrix = s->inter_matrix;
6242     for(i=0; i<=nCoeffs; i++) {
6243         int j= s->intra_scantable.permutated[i];
6244         level = block[j];
6245         if (level) {
6246             if (level < 0) {
6247                 level = -level;
6248                 level = (((level << 1) + 1) * qscale *
6249                          ((int) (quant_matrix[j]))) >> 4;
6250                 level = (level - 1) | 1;
6251                 level = -level;
6252             } else {
6253                 level = (((level << 1) + 1) * qscale *
6254                          ((int) (quant_matrix[j]))) >> 4;
6255                 level = (level - 1) | 1;
6256             }
6257             block[j] = level;
6258         }
6259     }
6260 }
6261
6262 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, 
6263                                    DCTELEM *block, int n, int qscale)
6264 {
6265     int i, level, nCoeffs;
6266     const uint16_t *quant_matrix;
6267
6268     if(s->alternate_scan) nCoeffs= 63;
6269     else nCoeffs= s->block_last_index[n];
6270     
6271     if (n < 4) 
6272         block[0] = block[0] * s->y_dc_scale;
6273     else
6274         block[0] = block[0] * s->c_dc_scale;
6275     quant_matrix = s->intra_matrix;
6276     for(i=1;i<=nCoeffs;i++) {
6277         int j= s->intra_scantable.permutated[i];
6278         level = block[j];
6279         if (level) {
6280             if (level < 0) {
6281                 level = -level;
6282                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6283                 level = -level;
6284             } else {
6285                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6286             }
6287             block[j] = level;
6288         }
6289     }
6290 }
6291
6292 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, 
6293                                    DCTELEM *block, int n, int qscale)
6294 {
6295     int i, level, nCoeffs;
6296     const uint16_t *quant_matrix;
6297     int sum=-1;
6298
6299     if(s->alternate_scan) nCoeffs= 63;
6300     else nCoeffs= s->block_last_index[n];
6301     
6302     quant_matrix = s->inter_matrix;
6303     for(i=0; i<=nCoeffs; i++) {
6304         int j= s->intra_scantable.permutated[i];
6305         level = block[j];
6306         if (level) {
6307             if (level < 0) {
6308                 level = -level;
6309                 level = (((level << 1) + 1) * qscale *
6310                          ((int) (quant_matrix[j]))) >> 4;
6311                 level = -level;
6312             } else {
6313                 level = (((level << 1) + 1) * qscale *
6314                          ((int) (quant_matrix[j]))) >> 4;
6315             }
6316             block[j] = level;
6317             sum+=level;
6318         }
6319     }
6320     block[63]^=sum&1;
6321 }
6322
6323 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
6324                                   DCTELEM *block, int n, int qscale)
6325 {
6326     int i, level, qmul, qadd;
6327     int nCoeffs;
6328     
6329     assert(s->block_last_index[n]>=0);
6330     
6331     qmul = qscale << 1;
6332     
6333     if (!s->h263_aic) {
6334         if (n < 4) 
6335             block[0] = block[0] * s->y_dc_scale;
6336         else
6337             block[0] = block[0] * s->c_dc_scale;
6338         qadd = (qscale - 1) | 1;
6339     }else{
6340         qadd = 0;
6341     }
6342     if(s->ac_pred)
6343         nCoeffs=63;
6344     else
6345         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6346
6347     for(i=1; i<=nCoeffs; i++) {
6348         level = block[i];
6349         if (level) {
6350             if (level < 0) {
6351                 level = level * qmul - qadd;
6352             } else {
6353                 level = level * qmul + qadd;
6354             }
6355             block[i] = level;
6356         }
6357     }
6358 }
6359
6360 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
6361                                   DCTELEM *block, int n, int qscale)
6362 {
6363     int i, level, qmul, qadd;
6364     int nCoeffs;
6365     
6366     assert(s->block_last_index[n]>=0);
6367     
6368     qadd = (qscale - 1) | 1;
6369     qmul = qscale << 1;
6370     
6371     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6372
6373     for(i=0; i<=nCoeffs; i++) {
6374         level = block[i];
6375         if (level) {
6376             if (level < 0) {
6377                 level = level * qmul - qadd;
6378             } else {
6379                 level = level * qmul + qadd;
6380             }
6381             block[i] = level;
6382         }
6383     }
6384 }
6385
6386 static const AVOption mpeg4_options[] =
6387 {
6388     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
6389     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
6390                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
6391                        bit_rate_tolerance, 4, 240000000, 8000),
6392     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
6393     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
6394     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
6395                           rc_eq, "tex^qComp,option1,options2", 0),
6396     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
6397                        rc_min_rate, 4, 24000000, 0),
6398     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
6399                        rc_max_rate, 4, 24000000, 0),
6400     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
6401                           rc_buffer_aggressivity, 4, 24000000, 0),
6402     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
6403                           rc_initial_cplx, 0., 9999999., 0),
6404     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
6405                           i_quant_factor, 0., 0., 0),
6406     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
6407                           i_quant_factor, -999999., 999999., 0),
6408     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
6409                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
6410     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
6411                           lumi_masking, 0., 999999., 0),
6412     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
6413                           temporal_cplx_masking, 0., 999999., 0),
6414     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
6415                           spatial_cplx_masking, 0., 999999., 0),
6416     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
6417                           p_masking, 0., 999999., 0),
6418     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
6419                           dark_masking, 0., 999999., 0),
6420     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
6421                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
6422
6423     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
6424                        mb_qmin, 0, 8, 0),
6425     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
6426                        mb_qmin, 0, 8, 0),
6427
6428     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
6429                        me_cmp, 0, 24000000, 0),
6430     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
6431                        me_sub_cmp, 0, 24000000, 0),
6432
6433
6434     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
6435                        dia_size, 0, 24000000, 0),
6436     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
6437                        last_predictor_count, 0, 24000000, 0),
6438
6439     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
6440                        pre_me, 0, 24000000, 0),
6441     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
6442                        me_pre_cmp, 0, 24000000, 0),
6443
6444     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
6445                        me_range, 0, 24000000, 0),
6446     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
6447                        pre_dia_size, 0, 24000000, 0),
6448     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
6449                        me_subpel_quality, 0, 24000000, 0),
6450     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
6451                        me_range, 0, 24000000, 0),
6452     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
6453                         flags, CODEC_FLAG_PSNR, 0),
6454     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
6455                               rc_override),
6456     AVOPTION_SUB(avoptions_common),
6457     AVOPTION_END()
6458 };
6459
6460 #ifdef CONFIG_ENCODERS
6461 AVCodec h263_encoder = {
6462     "h263",
6463     CODEC_TYPE_VIDEO,
6464     CODEC_ID_H263,
6465     sizeof(MpegEncContext),
6466     MPV_encode_init,
6467     MPV_encode_picture,
6468     MPV_encode_end,
6469     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6470 };
6471
6472 AVCodec h263p_encoder = {
6473     "h263p",
6474     CODEC_TYPE_VIDEO,
6475     CODEC_ID_H263P,
6476     sizeof(MpegEncContext),
6477     MPV_encode_init,
6478     MPV_encode_picture,
6479     MPV_encode_end,
6480     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6481 };
6482
6483 AVCodec flv_encoder = {
6484     "flv",
6485     CODEC_TYPE_VIDEO,
6486     CODEC_ID_FLV1,
6487     sizeof(MpegEncContext),
6488     MPV_encode_init,
6489     MPV_encode_picture,
6490     MPV_encode_end,
6491     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6492 };
6493
6494 AVCodec rv10_encoder = {
6495     "rv10",
6496     CODEC_TYPE_VIDEO,
6497     CODEC_ID_RV10,
6498     sizeof(MpegEncContext),
6499     MPV_encode_init,
6500     MPV_encode_picture,
6501     MPV_encode_end,
6502     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6503 };
6504
6505 AVCodec rv20_encoder = {
6506     "rv20",
6507     CODEC_TYPE_VIDEO,
6508     CODEC_ID_RV20,
6509     sizeof(MpegEncContext),
6510     MPV_encode_init,
6511     MPV_encode_picture,
6512     MPV_encode_end,
6513     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6514 };
6515
6516 AVCodec mpeg4_encoder = {
6517     "mpeg4",
6518     CODEC_TYPE_VIDEO,
6519     CODEC_ID_MPEG4,
6520     sizeof(MpegEncContext),
6521     MPV_encode_init,
6522     MPV_encode_picture,
6523     MPV_encode_end,
6524     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6525     .options = mpeg4_options,
6526     .capabilities= CODEC_CAP_DELAY,
6527 };
6528
6529 AVCodec msmpeg4v1_encoder = {
6530     "msmpeg4v1",
6531     CODEC_TYPE_VIDEO,
6532     CODEC_ID_MSMPEG4V1,
6533     sizeof(MpegEncContext),
6534     MPV_encode_init,
6535     MPV_encode_picture,
6536     MPV_encode_end,
6537     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6538     .options = mpeg4_options,
6539 };
6540
6541 AVCodec msmpeg4v2_encoder = {
6542     "msmpeg4v2",
6543     CODEC_TYPE_VIDEO,
6544     CODEC_ID_MSMPEG4V2,
6545     sizeof(MpegEncContext),
6546     MPV_encode_init,
6547     MPV_encode_picture,
6548     MPV_encode_end,
6549     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6550     .options = mpeg4_options,
6551 };
6552
6553 AVCodec msmpeg4v3_encoder = {
6554     "msmpeg4",
6555     CODEC_TYPE_VIDEO,
6556     CODEC_ID_MSMPEG4V3,
6557     sizeof(MpegEncContext),
6558     MPV_encode_init,
6559     MPV_encode_picture,
6560     MPV_encode_end,
6561     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6562     .options = mpeg4_options,
6563 };
6564
6565 AVCodec wmv1_encoder = {
6566     "wmv1",
6567     CODEC_TYPE_VIDEO,
6568     CODEC_ID_WMV1,
6569     sizeof(MpegEncContext),
6570     MPV_encode_init,
6571     MPV_encode_picture,
6572     MPV_encode_end,
6573     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6574     .options = mpeg4_options,
6575 };
6576
6577 AVCodec mjpeg_encoder = {
6578     "mjpeg",
6579     CODEC_TYPE_VIDEO,
6580     CODEC_ID_MJPEG,
6581     sizeof(MpegEncContext),
6582     MPV_encode_init,
6583     MPV_encode_picture,
6584     MPV_encode_end,
6585     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6586 };
6587
6588 #endif //CONFIG_ENCODERS