]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
minor optimization
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22  
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */ 
27  
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
55                                   DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h261_intra_c(MpegEncContext *s, 
57                                   DCTELEM *block, int n, int qscale);
58 static void dct_unquantize_h261_inter_c(MpegEncContext *s, 
59                                   DCTELEM *block, int n, int qscale);
60 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
61 #ifdef CONFIG_ENCODERS
62 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
63 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
64 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
65 static int sse_mb(MpegEncContext *s);
66 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
67 #endif //CONFIG_ENCODERS
68
69 #ifdef HAVE_XVMC
70 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
71 extern void XVMC_field_end(MpegEncContext *s);
72 extern void XVMC_decode_mb(MpegEncContext *s);
73 #endif
74
75 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
76
77
78 /* enable all paranoid tests for rounding, overflows, etc... */
79 //#define PARANOID
80
81 //#define DEBUG
82
83
84 /* for jpeg fast DCT */
85 #define CONST_BITS 14
86
87 static const uint16_t aanscales[64] = {
88     /* precomputed values scaled up by 14 bits */
89     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
91     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
92     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
93     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
94     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
95     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
96     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
97 };
98
99 static const uint8_t h263_chroma_roundtab[16] = {
100 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
101     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
102 };
103
104 static const uint8_t ff_default_chroma_qscale_table[32]={
105 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
106     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
107 };
108
109 #ifdef CONFIG_ENCODERS
110 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
111 static uint8_t default_fcode_tab[MAX_MV*2+1];
112
113 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
114
115 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
116                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
117 {
118     int qscale;
119
120     for(qscale=qmin; qscale<=qmax; qscale++){
121         int i;
122         if (dsp->fdct == ff_jpeg_fdct_islow 
123 #ifdef FAAN_POSTSCALE
124             || dsp->fdct == ff_faandct
125 #endif
126             ) {
127             for(i=0;i<64;i++) {
128                 const int j= dsp->idct_permutation[i];
129                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
130                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
131                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
132                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
133                 
134                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
135                                 (qscale * quant_matrix[j]));
136             }
137         } else if (dsp->fdct == fdct_ifast
138 #ifndef FAAN_POSTSCALE
139                    || dsp->fdct == ff_faandct
140 #endif
141                    ) {
142             for(i=0;i<64;i++) {
143                 const int j= dsp->idct_permutation[i];
144                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
145                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
146                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
147                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
148                 
149                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
150                                 (aanscales[i] * qscale * quant_matrix[j]));
151             }
152         } else {
153             for(i=0;i<64;i++) {
154                 const int j= dsp->idct_permutation[i];
155                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
156                    So 16           <= qscale * quant_matrix[i]             <= 7905
157                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
158                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
159                 */
160                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
161 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
162                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
163
164                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
165                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
166             }
167         }
168     }
169 }
170
171 static inline void update_qscale(MpegEncContext *s){
172     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
173     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
174     
175     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
176 }
177 #endif //CONFIG_ENCODERS
178
179 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
180     int i;
181     int end;
182     
183     st->scantable= src_scantable;
184
185     for(i=0; i<64; i++){
186         int j;
187         j = src_scantable[i];
188         st->permutated[i] = permutation[j];
189 #ifdef ARCH_POWERPC
190         st->inverse[j] = i;
191 #endif
192     }
193     
194     end=-1;
195     for(i=0; i<64; i++){
196         int j;
197         j = st->permutated[i];
198         if(j>end) end=j;
199         st->raster_end[i]= end;
200     }
201 }
202
203 #ifdef CONFIG_ENCODERS
204 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
205     int i;
206
207     if(matrix){
208         put_bits(pb, 1, 1);
209         for(i=0;i<64;i++) {
210             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
211         }
212     }else
213         put_bits(pb, 1, 0);
214 }
215 #endif //CONFIG_ENCODERS
216
217 /* init common dct for both encoder and decoder */
218 int DCT_common_init(MpegEncContext *s)
219 {
220     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
221     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
222     s->dct_unquantize_h261_intra = dct_unquantize_h261_intra_c;
223     s->dct_unquantize_h261_inter = dct_unquantize_h261_inter_c;
224     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
225     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
226     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
227     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
228
229 #ifdef CONFIG_ENCODERS
230     s->dct_quantize= dct_quantize_c;
231     s->denoise_dct= denoise_dct_c;
232 #endif
233         
234 #ifdef HAVE_MMX
235     MPV_common_init_mmx(s);
236 #endif
237 #ifdef ARCH_ALPHA
238     MPV_common_init_axp(s);
239 #endif
240 #ifdef HAVE_MLIB
241     MPV_common_init_mlib(s);
242 #endif
243 #ifdef HAVE_MMI
244     MPV_common_init_mmi(s);
245 #endif
246 #ifdef ARCH_ARMV4L
247     MPV_common_init_armv4l(s);
248 #endif
249 #ifdef ARCH_POWERPC
250     MPV_common_init_ppc(s);
251 #endif
252
253 #ifdef CONFIG_ENCODERS
254     s->fast_dct_quantize= s->dct_quantize;
255
256     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
257         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
258     }
259
260 #endif //CONFIG_ENCODERS
261
262     /* load & permutate scantables
263        note: only wmv uses differnt ones 
264     */
265     if(s->alternate_scan){
266         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
267         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
268     }else{
269         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
270         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
271     }
272     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
273     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
274
275     return 0;
276 }
277
278 static void copy_picture(Picture *dst, Picture *src){
279     *dst = *src;
280     dst->type= FF_BUFFER_TYPE_COPY;
281 }
282
283 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
284     int i;
285
286     dst->pict_type              = src->pict_type;
287     dst->quality                = src->quality;
288     dst->coded_picture_number   = src->coded_picture_number;
289     dst->display_picture_number = src->display_picture_number;
290 //    dst->reference              = src->reference;
291     dst->pts                    = src->pts;
292     dst->interlaced_frame       = src->interlaced_frame;
293     dst->top_field_first        = src->top_field_first;
294
295     if(s->avctx->me_threshold){
296         if(!src->motion_val[0])
297             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
298         if(!src->mb_type)
299             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
300         if(!src->ref_index[0])
301             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
302         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
303             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesnt match! (%d!=%d)\n",
304             src->motion_subsample_log2, dst->motion_subsample_log2);
305
306         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
307         
308         for(i=0; i<2; i++){
309             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
310             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
311
312             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
313                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
314             }
315             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
316                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
317             }
318         }
319     }
320 }
321
322 /**
323  * allocates a Picture
324  * The pixels are allocated/set by calling get_buffer() if shared=0
325  */
326 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
327     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
328     const int mb_array_size= s->mb_stride*s->mb_height;
329     const int b8_array_size= s->b8_stride*s->mb_height*2;
330     const int b4_array_size= s->b4_stride*s->mb_height*4;
331     int i;
332     
333     if(shared){
334         assert(pic->data[0]);
335         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
336         pic->type= FF_BUFFER_TYPE_SHARED;
337     }else{
338         int r;
339         
340         assert(!pic->data[0]);
341         
342         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
343         
344         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
345             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
346             return -1;
347         }
348
349         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
350             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
351             return -1;
352         }
353
354         if(pic->linesize[1] != pic->linesize[2]){
355             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
356             return -1;
357         }
358
359         s->linesize  = pic->linesize[0];
360         s->uvlinesize= pic->linesize[1];
361     }
362     
363     if(pic->qscale_table==NULL){
364         if (s->encoding) {        
365             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
366             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
367             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
368         }
369
370         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
371         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
372         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
373         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
374         if(s->out_format == FMT_H264){
375             for(i=0; i<2; i++){
376                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+2)  * sizeof(int16_t))
377                 pic->motion_val[i]= pic->motion_val_base[i]+2;
378                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
379             }
380             pic->motion_subsample_log2= 2;
381         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
382             for(i=0; i<2; i++){
383                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+2) * sizeof(int16_t))
384                 pic->motion_val[i]= pic->motion_val_base[i]+2;
385                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
386             }
387             pic->motion_subsample_log2= 3;
388         }
389         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
390             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
391         }
392         pic->qstride= s->mb_stride;
393         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
394     }
395
396     //it might be nicer if the application would keep track of these but it would require a API change
397     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
398     s->prev_pict_types[0]= s->pict_type;
399     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
400         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
401     
402     return 0;
403 fail: //for the CHECKED_ALLOCZ macro
404     return -1;
405 }
406
407 /**
408  * deallocates a picture
409  */
410 static void free_picture(MpegEncContext *s, Picture *pic){
411     int i;
412
413     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
414         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
415     }
416
417     av_freep(&pic->mb_var);
418     av_freep(&pic->mc_mb_var);
419     av_freep(&pic->mb_mean);
420     av_freep(&pic->mbskip_table);
421     av_freep(&pic->qscale_table);
422     av_freep(&pic->mb_type_base);
423     av_freep(&pic->dct_coeff);
424     av_freep(&pic->pan_scan);
425     pic->mb_type= NULL;
426     for(i=0; i<2; i++){
427         av_freep(&pic->motion_val_base[i]);
428         av_freep(&pic->ref_index[i]);
429     }
430     
431     if(pic->type == FF_BUFFER_TYPE_SHARED){
432         for(i=0; i<4; i++){
433             pic->base[i]=
434             pic->data[i]= NULL;
435         }
436         pic->type= 0;        
437     }
438 }
439
440 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
441     int i;
442
443     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) 
444     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
445     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
446
447      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
448     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t)) 
449     s->rd_scratchpad=   s->me.scratchpad;
450     s->b_scratchpad=    s->me.scratchpad;
451     s->obmc_scratchpad= s->me.scratchpad + 16;
452     if (s->encoding) {
453         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
454         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
455         if(s->avctx->noise_reduction){
456             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
457         }
458     }   
459     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
460     s->block= s->blocks[0];
461
462     for(i=0;i<12;i++){
463         s->pblocks[i] = (short *)(&s->block[i]);
464     }
465     return 0;
466 fail:
467     return -1; //free() through MPV_common_end()
468 }
469
470 static void free_duplicate_context(MpegEncContext *s){
471     if(s==NULL) return;
472
473     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
474     av_freep(&s->me.scratchpad);
475     s->rd_scratchpad=   
476     s->b_scratchpad=    
477     s->obmc_scratchpad= NULL;
478     
479     av_freep(&s->dct_error_sum);
480     av_freep(&s->me.map);
481     av_freep(&s->me.score_map);
482     av_freep(&s->blocks);
483     s->block= NULL;
484 }
485
486 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
487 #define COPY(a) bak->a= src->a
488     COPY(allocated_edge_emu_buffer);
489     COPY(edge_emu_buffer);
490     COPY(me.scratchpad);
491     COPY(rd_scratchpad);
492     COPY(b_scratchpad);
493     COPY(obmc_scratchpad);
494     COPY(me.map);
495     COPY(me.score_map);
496     COPY(blocks);
497     COPY(block);
498     COPY(start_mb_y);
499     COPY(end_mb_y);
500     COPY(me.map_generation);
501     COPY(pb);
502     COPY(dct_error_sum);
503     COPY(dct_count[0]);
504     COPY(dct_count[1]);
505 #undef COPY
506 }
507
508 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
509     MpegEncContext bak;
510     int i;
511     //FIXME copy only needed parts
512 //START_TIMER
513     backup_duplicate_context(&bak, dst);
514     memcpy(dst, src, sizeof(MpegEncContext));
515     backup_duplicate_context(dst, &bak);
516     for(i=0;i<12;i++){
517         dst->pblocks[i] = (short *)(&dst->block[i]);
518     }
519 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
520 }
521
522 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
523 #define COPY(a) dst->a= src->a
524     COPY(pict_type);
525     COPY(current_picture);
526     COPY(f_code);
527     COPY(b_code);
528     COPY(qscale);
529     COPY(lambda);
530     COPY(lambda2);
531     COPY(picture_in_gop_number);
532     COPY(gop_picture_number);
533     COPY(frame_pred_frame_dct); //FIXME dont set in encode_header
534     COPY(progressive_frame); //FIXME dont set in encode_header
535     COPY(partitioned_frame); //FIXME dont set in encode_header
536 #undef COPY
537 }
538
539 /**
540  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
541  * the changed fields will not depend upon the prior state of the MpegEncContext.
542  */
543 static void MPV_common_defaults(MpegEncContext *s){
544     s->y_dc_scale_table=
545     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
546     s->chroma_qscale_table= ff_default_chroma_qscale_table;
547     s->progressive_frame= 1;
548     s->progressive_sequence= 1;
549     s->picture_structure= PICT_FRAME;
550
551     s->coded_picture_number = 0;
552     s->picture_number = 0;
553     s->input_picture_number = 0;
554
555     s->picture_in_gop_number = 0;
556
557     s->f_code = 1;
558     s->b_code = 1;
559 }
560
561 /**
562  * sets the given MpegEncContext to defaults for decoding.
563  * the changed fields will not depend upon the prior state of the MpegEncContext.
564  */
565 void MPV_decode_defaults(MpegEncContext *s){
566     MPV_common_defaults(s);
567 }
568
569 /**
570  * sets the given MpegEncContext to defaults for encoding.
571  * the changed fields will not depend upon the prior state of the MpegEncContext.
572  */
573
574 #ifdef CONFIG_ENCODERS
575 static void MPV_encode_defaults(MpegEncContext *s){
576     static int done=0;
577     
578     MPV_common_defaults(s);
579     
580     if(!done){
581         int i;
582         done=1;
583
584         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
585         memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
586         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
587
588         for(i=-16; i<16; i++){
589             default_fcode_tab[i + MAX_MV]= 1;
590         }
591     }
592     s->me.mv_penalty= default_mv_penalty;
593     s->fcode_tab= default_fcode_tab;
594 }
595 #endif //CONFIG_ENCODERS
596
597 /** 
598  * init common structure for both encoder and decoder.
599  * this assumes that some variables like width/height are already set
600  */
601 int MPV_common_init(MpegEncContext *s)
602 {
603     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
604
605     if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
606         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
607         return -1;
608     }
609
610     dsputil_init(&s->dsp, s->avctx);
611     DCT_common_init(s);
612
613     s->flags= s->avctx->flags;
614     s->flags2= s->avctx->flags2;
615
616     s->mb_width  = (s->width  + 15) / 16;
617     s->mb_height = (s->height + 15) / 16;
618     s->mb_stride = s->mb_width + 1;
619     s->b8_stride = s->mb_width*2 + 1;
620     s->b4_stride = s->mb_width*4 + 1;
621     mb_array_size= s->mb_height * s->mb_stride;
622     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
623
624     /* set chroma shifts */
625     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
626                                                     &(s->chroma_y_shift) );
627
628     /* set default edge pos, will be overriden in decode_header if needed */
629     s->h_edge_pos= s->mb_width*16;
630     s->v_edge_pos= s->mb_height*16;
631
632     s->mb_num = s->mb_width * s->mb_height;
633     
634     s->block_wrap[0]=
635     s->block_wrap[1]=
636     s->block_wrap[2]=
637     s->block_wrap[3]= s->b8_stride;
638     s->block_wrap[4]=
639     s->block_wrap[5]= s->mb_stride;
640  
641     y_size = s->b8_stride * (2 * s->mb_height + 1);
642     c_size = s->mb_stride * (s->mb_height + 1);
643     yc_size = y_size + 2 * c_size;
644     
645     /* convert fourcc to upper case */
646     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
647                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
648                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
649                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
650
651     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)          
652                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
653                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
654                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
655
656     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
657
658     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
659     for(y=0; y<s->mb_height; y++){
660         for(x=0; x<s->mb_width; x++){
661             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
662         }
663     }
664     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
665     
666     if (s->encoding) {
667         /* Allocate MV tables */
668         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
669         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
670         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
671         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
672         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
673         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
674         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
675         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
676         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
677         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
678         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
679         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
680
681         if(s->msmpeg4_version){
682             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
683         }
684         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
685
686         /* Allocate MB type table */
687         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
688         
689         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
690         
691         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
692         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
693         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
694         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
695         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
696         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
697         
698         if(s->avctx->noise_reduction){
699             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
700         }
701     }
702     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
703
704     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
705     
706     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
707         /* interlaced direct mode decoding tables */
708             for(i=0; i<2; i++){
709                 int j, k;
710                 for(j=0; j<2; j++){
711                     for(k=0; k<2; k++){
712                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
713                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
714                     }
715                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
716                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
717                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
718                 }
719                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
720             }
721     }
722     if (s->out_format == FMT_H263) {
723         /* ac values */
724         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
725         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
726         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
727         s->ac_val[2] = s->ac_val[1] + c_size;
728         
729         /* cbp values */
730         CHECKED_ALLOCZ(s->coded_block_base, y_size);
731         s->coded_block= s->coded_block_base + s->b8_stride + 1;
732         
733         /* divx501 bitstream reorder buffer */
734         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
735
736         /* cbp, ac_pred, pred_dir */
737         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
738         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
739     }
740     
741     if (s->h263_pred || s->h263_plus || !s->encoding) {
742         /* dc values */
743         //MN: we need these for error resilience of intra-frames
744         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
745         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
746         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
747         s->dc_val[2] = s->dc_val[1] + c_size;
748         for(i=0;i<yc_size;i++)
749             s->dc_val_base[i] = 1024;
750     }
751
752     /* which mb is a intra block */
753     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
754     memset(s->mbintra_table, 1, mb_array_size);
755     
756     /* init macroblock skip table */
757     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
758     //Note the +1 is for a quicker mpeg4 slice_end detection
759     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
760     
761     s->parse_context.state= -1;
762     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
763        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
764        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
765        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
766     }
767
768     s->context_initialized = 1;
769
770     s->thread_context[0]= s;
771     for(i=1; i<s->avctx->thread_count; i++){
772         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
773         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
774     }
775
776     for(i=0; i<s->avctx->thread_count; i++){
777         if(init_duplicate_context(s->thread_context[i], s) < 0)
778            goto fail;
779         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
780         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
781     }
782
783     return 0;
784  fail:
785     MPV_common_end(s);
786     return -1;
787 }
788
789 /* init common structure for both encoder and decoder */
790 void MPV_common_end(MpegEncContext *s)
791 {
792     int i, j, k;
793
794     for(i=0; i<s->avctx->thread_count; i++){
795         free_duplicate_context(s->thread_context[i]);
796     }
797     for(i=1; i<s->avctx->thread_count; i++){
798         av_freep(&s->thread_context[i]);
799     }
800
801     av_freep(&s->parse_context.buffer);
802     s->parse_context.buffer_size=0;
803
804     av_freep(&s->mb_type);
805     av_freep(&s->p_mv_table_base);
806     av_freep(&s->b_forw_mv_table_base);
807     av_freep(&s->b_back_mv_table_base);
808     av_freep(&s->b_bidir_forw_mv_table_base);
809     av_freep(&s->b_bidir_back_mv_table_base);
810     av_freep(&s->b_direct_mv_table_base);
811     s->p_mv_table= NULL;
812     s->b_forw_mv_table= NULL;
813     s->b_back_mv_table= NULL;
814     s->b_bidir_forw_mv_table= NULL;
815     s->b_bidir_back_mv_table= NULL;
816     s->b_direct_mv_table= NULL;
817     for(i=0; i<2; i++){
818         for(j=0; j<2; j++){
819             for(k=0; k<2; k++){
820                 av_freep(&s->b_field_mv_table_base[i][j][k]);
821                 s->b_field_mv_table[i][j][k]=NULL;
822             }
823             av_freep(&s->b_field_select_table[i][j]);
824             av_freep(&s->p_field_mv_table_base[i][j]);
825             s->p_field_mv_table[i][j]=NULL;
826         }
827         av_freep(&s->p_field_select_table[i]);
828     }
829     
830     av_freep(&s->dc_val_base);
831     av_freep(&s->ac_val_base);
832     av_freep(&s->coded_block_base);
833     av_freep(&s->mbintra_table);
834     av_freep(&s->cbp_table);
835     av_freep(&s->pred_dir_table);
836     
837     av_freep(&s->mbskip_table);
838     av_freep(&s->prev_pict_types);
839     av_freep(&s->bitstream_buffer);
840     av_freep(&s->avctx->stats_out);
841     av_freep(&s->ac_stats);
842     av_freep(&s->error_status_table);
843     av_freep(&s->mb_index2xy);
844     av_freep(&s->lambda_table);
845     av_freep(&s->q_intra_matrix);
846     av_freep(&s->q_inter_matrix);
847     av_freep(&s->q_intra_matrix16);
848     av_freep(&s->q_inter_matrix16);
849     av_freep(&s->input_picture);
850     av_freep(&s->reordered_input_picture);
851     av_freep(&s->dct_offset);
852
853     if(s->picture){
854         for(i=0; i<MAX_PICTURE_COUNT; i++){
855             free_picture(s, &s->picture[i]);
856         }
857     }
858     av_freep(&s->picture);
859     s->context_initialized = 0;
860     s->last_picture_ptr=
861     s->next_picture_ptr=
862     s->current_picture_ptr= NULL;
863
864     for(i=0; i<3; i++)
865         av_freep(&s->visualization_buffer[i]);
866 }
867
868 #ifdef CONFIG_ENCODERS
869
870 /* init video encoder */
871 int MPV_encode_init(AVCodecContext *avctx)
872 {
873     MpegEncContext *s = avctx->priv_data;
874     int i, dummy;
875     int chroma_h_shift, chroma_v_shift;
876     
877     MPV_encode_defaults(s);
878
879     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
880
881     s->bit_rate = avctx->bit_rate;
882     s->width = avctx->width;
883     s->height = avctx->height;
884     if(avctx->gop_size > 600){
885         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
886         avctx->gop_size=600;
887     }
888     s->gop_size = avctx->gop_size;
889     s->avctx = avctx;
890     s->flags= avctx->flags;
891     s->flags2= avctx->flags2;
892     s->max_b_frames= avctx->max_b_frames;
893     s->codec_id= avctx->codec->id;
894     s->luma_elim_threshold  = avctx->luma_elim_threshold;
895     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
896     s->strict_std_compliance= avctx->strict_std_compliance;
897     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
898     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
899     s->mpeg_quant= avctx->mpeg_quant;
900     s->rtp_mode= !!avctx->rtp_payload_size;
901     s->intra_dc_precision= avctx->intra_dc_precision;
902
903     if (s->gop_size <= 1) {
904         s->intra_only = 1;
905         s->gop_size = 12;
906     } else {
907         s->intra_only = 0;
908     }
909
910     s->me_method = avctx->me_method;
911
912     /* Fixed QSCALE */
913     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
914     
915     s->adaptive_quant= (   s->avctx->lumi_masking
916                         || s->avctx->dark_masking
917                         || s->avctx->temporal_cplx_masking 
918                         || s->avctx->spatial_cplx_masking
919                         || s->avctx->p_masking
920                         || (s->flags&CODEC_FLAG_QP_RD))
921                        && !s->fixed_qscale;
922     
923     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
924     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
925     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
926
927     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
928         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
929         return -1;
930     }    
931
932     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
933         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n");
934     }
935     
936     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
937         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
938         return -1;
939     }
940     
941     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
942         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
943         return -1;
944     }
945         
946     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate 
947        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
948        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
949         
950         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
951     }
952        
953     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
954        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
955         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
956         return -1;
957     }
958         
959     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
960         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
961         return -1;
962     }
963     
964     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
965         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
966         return -1;
967     }
968     
969     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
970         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
971         return -1;
972     }
973
974     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
975         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
976         return -1;
977     }
978     
979     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
980         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
981         return -1;
982     }
983
984     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN)) 
985        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
986         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
987         return -1;
988     }
989         
990     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
991         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
992         return -1;
993     }
994         
995     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
996         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
997         return -1;
998     }
999
1000     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1001         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1002         return -1;
1003     }
1004     
1005     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1006         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1007         return -1;
1008     }
1009     
1010     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 
1011        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO 
1012        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1013         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1014         return -1;
1015     }
1016     
1017     if(s->avctx->thread_count > 1)
1018         s->rtp_mode= 1;
1019
1020     i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
1021     if(i > 1){
1022         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1023         avctx->frame_rate /= i;
1024         avctx->frame_rate_base /= i;
1025 //        return -1;
1026     }
1027     
1028     if(s->codec_id==CODEC_ID_MJPEG){
1029         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1030         s->inter_quant_bias= 0;
1031     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1032         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1033         s->inter_quant_bias= 0;
1034     }else{
1035         s->intra_quant_bias=0;
1036         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1037     }
1038     
1039     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1040         s->intra_quant_bias= avctx->intra_quant_bias;
1041     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1042         s->inter_quant_bias= avctx->inter_quant_bias;
1043         
1044     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1045
1046     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
1047     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
1048
1049     switch(avctx->codec->id) {
1050     case CODEC_ID_MPEG1VIDEO:
1051         s->out_format = FMT_MPEG1;
1052         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1053         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1054         break;
1055     case CODEC_ID_MPEG2VIDEO:
1056         s->out_format = FMT_MPEG1;
1057         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1058         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1059         s->rtp_mode= 1;
1060         break;
1061     case CODEC_ID_LJPEG:
1062     case CODEC_ID_MJPEG:
1063         s->out_format = FMT_MJPEG;
1064         s->intra_only = 1; /* force intra only for jpeg */
1065         s->mjpeg_write_tables = 1; /* write all tables */
1066         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1067         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1068         s->mjpeg_vsample[1] = 1;
1069         s->mjpeg_vsample[2] = 1; 
1070         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1071         s->mjpeg_hsample[1] = 1; 
1072         s->mjpeg_hsample[2] = 1; 
1073         if (mjpeg_init(s) < 0)
1074             return -1;
1075         avctx->delay=0;
1076         s->low_delay=1;
1077         break;
1078 #ifdef CONFIG_RISKY
1079     case CODEC_ID_H263:
1080         if (h263_get_picture_format(s->width, s->height) == 7) {
1081             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
1082             return -1;
1083         }
1084         s->out_format = FMT_H263;
1085         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1086         avctx->delay=0;
1087         s->low_delay=1;
1088         break;
1089     case CODEC_ID_H263P:
1090         s->out_format = FMT_H263;
1091         s->h263_plus = 1;
1092         /* Fx */
1093         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1094         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1095         s->modified_quant= s->h263_aic;
1096         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1097         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1098         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1099         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1100         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1101
1102         /* /Fx */
1103         /* These are just to be sure */
1104         avctx->delay=0;
1105         s->low_delay=1;
1106         break;
1107     case CODEC_ID_FLV1:
1108         s->out_format = FMT_H263;
1109         s->h263_flv = 2; /* format = 1; 11-bit codes */
1110         s->unrestricted_mv = 1;
1111         s->rtp_mode=0; /* don't allow GOB */
1112         avctx->delay=0;
1113         s->low_delay=1;
1114         break;
1115     case CODEC_ID_RV10:
1116         s->out_format = FMT_H263;
1117         avctx->delay=0;
1118         s->low_delay=1;
1119         break;
1120     case CODEC_ID_MPEG4:
1121         s->out_format = FMT_H263;
1122         s->h263_pred = 1;
1123         s->unrestricted_mv = 1;
1124         s->low_delay= s->max_b_frames ? 0 : 1;
1125         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1126         break;
1127     case CODEC_ID_MSMPEG4V1:
1128         s->out_format = FMT_H263;
1129         s->h263_msmpeg4 = 1;
1130         s->h263_pred = 1;
1131         s->unrestricted_mv = 1;
1132         s->msmpeg4_version= 1;
1133         avctx->delay=0;
1134         s->low_delay=1;
1135         break;
1136     case CODEC_ID_MSMPEG4V2:
1137         s->out_format = FMT_H263;
1138         s->h263_msmpeg4 = 1;
1139         s->h263_pred = 1;
1140         s->unrestricted_mv = 1;
1141         s->msmpeg4_version= 2;
1142         avctx->delay=0;
1143         s->low_delay=1;
1144         break;
1145     case CODEC_ID_MSMPEG4V3:
1146         s->out_format = FMT_H263;
1147         s->h263_msmpeg4 = 1;
1148         s->h263_pred = 1;
1149         s->unrestricted_mv = 1;
1150         s->msmpeg4_version= 3;
1151         s->flipflop_rounding=1;
1152         avctx->delay=0;
1153         s->low_delay=1;
1154         break;
1155     case CODEC_ID_WMV1:
1156         s->out_format = FMT_H263;
1157         s->h263_msmpeg4 = 1;
1158         s->h263_pred = 1;
1159         s->unrestricted_mv = 1;
1160         s->msmpeg4_version= 4;
1161         s->flipflop_rounding=1;
1162         avctx->delay=0;
1163         s->low_delay=1;
1164         break;
1165     case CODEC_ID_WMV2:
1166         s->out_format = FMT_H263;
1167         s->h263_msmpeg4 = 1;
1168         s->h263_pred = 1;
1169         s->unrestricted_mv = 1;
1170         s->msmpeg4_version= 5;
1171         s->flipflop_rounding=1;
1172         avctx->delay=0;
1173         s->low_delay=1;
1174         break;
1175 #endif
1176     default:
1177         return -1;
1178     }
1179     
1180     avctx->has_b_frames= !s->low_delay;
1181
1182     s->encoding = 1;
1183
1184     /* init */
1185     if (MPV_common_init(s) < 0)
1186         return -1;
1187
1188     if(s->modified_quant)
1189         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1190     s->progressive_frame= 
1191     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1192     s->quant_precision=5;
1193     
1194     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1195     
1196 #ifdef CONFIG_ENCODERS
1197 #ifdef CONFIG_RISKY
1198     if (s->out_format == FMT_H263)
1199         h263_encode_init(s);
1200     if(s->msmpeg4_version)
1201         ff_msmpeg4_encode_init(s);
1202 #endif
1203     if (s->out_format == FMT_MPEG1)
1204         ff_mpeg1_encode_init(s);
1205 #endif
1206
1207     /* init q matrix */
1208     for(i=0;i<64;i++) {
1209         int j= s->dsp.idct_permutation[i];
1210 #ifdef CONFIG_RISKY
1211         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1212             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1213             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1214         }else if(s->out_format == FMT_H263){
1215             s->intra_matrix[j] =
1216             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1217         }else
1218 #endif
1219         { /* mpeg1/2 */
1220             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1221             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1222         }
1223         if(s->avctx->intra_matrix)
1224             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1225         if(s->avctx->inter_matrix)
1226             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1227     }
1228
1229     /* precompute matrix */
1230     /* for mjpeg, we do include qscale in the matrix */
1231     if (s->out_format != FMT_MJPEG) {
1232         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
1233                        s->intra_matrix, s->intra_quant_bias, 1, 31);
1234         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, 
1235                        s->inter_matrix, s->inter_quant_bias, 1, 31);
1236     }
1237
1238     if(ff_rate_control_init(s) < 0)
1239         return -1;
1240     
1241     return 0;
1242 }
1243
1244 int MPV_encode_end(AVCodecContext *avctx)
1245 {
1246     MpegEncContext *s = avctx->priv_data;
1247
1248 #ifdef STATS
1249     print_stats();
1250 #endif
1251
1252     ff_rate_control_uninit(s);
1253
1254     MPV_common_end(s);
1255     if (s->out_format == FMT_MJPEG)
1256         mjpeg_close(s);
1257
1258     av_freep(&avctx->extradata);
1259       
1260     return 0;
1261 }
1262
1263 #endif //CONFIG_ENCODERS
1264
1265 void init_rl(RLTable *rl)
1266 {
1267     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1268     uint8_t index_run[MAX_RUN+1];
1269     int last, run, level, start, end, i;
1270
1271     /* compute max_level[], max_run[] and index_run[] */
1272     for(last=0;last<2;last++) {
1273         if (last == 0) {
1274             start = 0;
1275             end = rl->last;
1276         } else {
1277             start = rl->last;
1278             end = rl->n;
1279         }
1280
1281         memset(max_level, 0, MAX_RUN + 1);
1282         memset(max_run, 0, MAX_LEVEL + 1);
1283         memset(index_run, rl->n, MAX_RUN + 1);
1284         for(i=start;i<end;i++) {
1285             run = rl->table_run[i];
1286             level = rl->table_level[i];
1287             if (index_run[run] == rl->n)
1288                 index_run[run] = i;
1289             if (level > max_level[run])
1290                 max_level[run] = level;
1291             if (run > max_run[level])
1292                 max_run[level] = run;
1293         }
1294         rl->max_level[last] = av_malloc(MAX_RUN + 1);
1295         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1296         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1297         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1298         rl->index_run[last] = av_malloc(MAX_RUN + 1);
1299         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1300     }
1301 }
1302
1303 /* draw the edges of width 'w' of an image of size width, height */
1304 //FIXME check that this is ok for mpeg4 interlaced
1305 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1306 {
1307     uint8_t *ptr, *last_line;
1308     int i;
1309
1310     last_line = buf + (height - 1) * wrap;
1311     for(i=0;i<w;i++) {
1312         /* top and bottom */
1313         memcpy(buf - (i + 1) * wrap, buf, width);
1314         memcpy(last_line + (i + 1) * wrap, last_line, width);
1315     }
1316     /* left and right */
1317     ptr = buf;
1318     for(i=0;i<height;i++) {
1319         memset(ptr - w, ptr[0], w);
1320         memset(ptr + width, ptr[width-1], w);
1321         ptr += wrap;
1322     }
1323     /* corners */
1324     for(i=0;i<w;i++) {
1325         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1326         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1327         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1328         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1329     }
1330 }
1331
1332 int ff_find_unused_picture(MpegEncContext *s, int shared){
1333     int i;
1334     
1335     if(shared){
1336         for(i=0; i<MAX_PICTURE_COUNT; i++){
1337             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1338         }
1339     }else{
1340         for(i=0; i<MAX_PICTURE_COUNT; i++){
1341             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1342         }
1343         for(i=0; i<MAX_PICTURE_COUNT; i++){
1344             if(s->picture[i].data[0]==NULL) return i;
1345         }
1346     }
1347
1348     assert(0);
1349     return -1;
1350 }
1351
1352 static void update_noise_reduction(MpegEncContext *s){
1353     int intra, i;
1354
1355     for(intra=0; intra<2; intra++){
1356         if(s->dct_count[intra] > (1<<16)){
1357             for(i=0; i<64; i++){
1358                 s->dct_error_sum[intra][i] >>=1;
1359             }
1360             s->dct_count[intra] >>= 1;
1361         }
1362         
1363         for(i=0; i<64; i++){
1364             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1365         }
1366     }
1367 }
1368
1369 /**
1370  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1371  */
1372 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1373 {
1374     int i;
1375     AVFrame *pic;
1376     s->mb_skiped = 0;
1377
1378     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1379
1380     /* mark&release old frames */
1381     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1382         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1383
1384         /* release forgotten pictures */
1385         /* if(mpeg124/h263) */
1386         if(!s->encoding){
1387             for(i=0; i<MAX_PICTURE_COUNT; i++){
1388                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1389                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1390                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
1391                 }
1392             }
1393         }
1394     }
1395 alloc:
1396     if(!s->encoding){
1397         /* release non refernce frames */
1398         for(i=0; i<MAX_PICTURE_COUNT; i++){
1399             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1400                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1401             }
1402         }
1403
1404         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1405             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1406         else{
1407             i= ff_find_unused_picture(s, 0);
1408             pic= (AVFrame*)&s->picture[i];
1409         }
1410
1411         pic->reference= s->pict_type != B_TYPE && !s->dropable ? 3 : 0;
1412
1413         pic->coded_picture_number= s->coded_picture_number++;
1414         
1415         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1416             return -1;
1417
1418         s->current_picture_ptr= (Picture*)pic;
1419         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1420         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1421     }
1422
1423     s->current_picture_ptr->pict_type= s->pict_type;
1424 //    if(s->flags && CODEC_FLAG_QSCALE) 
1425   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1426     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1427
1428     copy_picture(&s->current_picture, s->current_picture_ptr);
1429   
1430   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1431     if (s->pict_type != B_TYPE) {
1432         s->last_picture_ptr= s->next_picture_ptr;
1433         if(!s->dropable)
1434             s->next_picture_ptr= s->current_picture_ptr;
1435     }
1436 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1437         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL, 
1438         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL, 
1439         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1440         s->pict_type, s->dropable);*/
1441     
1442     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1443     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1444     
1445     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1446         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1447         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1448         goto alloc;
1449     }
1450
1451     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1452
1453     if(s->picture_structure!=PICT_FRAME){
1454         int i;
1455         for(i=0; i<4; i++){
1456             if(s->picture_structure == PICT_BOTTOM_FIELD){
1457                  s->current_picture.data[i] += s->current_picture.linesize[i];
1458             } 
1459             s->current_picture.linesize[i] *= 2;
1460             s->last_picture.linesize[i] *=2;
1461             s->next_picture.linesize[i] *=2;
1462         }
1463     }
1464   }
1465    
1466     s->hurry_up= s->avctx->hurry_up;
1467     s->error_resilience= avctx->error_resilience;
1468
1469     /* set dequantizer, we cant do it during init as it might change for mpeg4
1470        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1471     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1472         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1473         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1474     }else if(s->out_format == FMT_H263){
1475         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1476         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1477     }else if(s->out_format == FMT_H261){
1478         s->dct_unquantize_intra = s->dct_unquantize_h261_intra;
1479         s->dct_unquantize_inter = s->dct_unquantize_h261_inter;
1480     }else{
1481         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1482         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1483     }
1484
1485     if(s->dct_error_sum){
1486         assert(s->avctx->noise_reduction && s->encoding);
1487
1488         update_noise_reduction(s);
1489     }
1490         
1491 #ifdef HAVE_XVMC
1492     if(s->avctx->xvmc_acceleration)
1493         return XVMC_field_start(s, avctx);
1494 #endif
1495     return 0;
1496 }
1497
1498 /* generic function for encode/decode called after a frame has been coded/decoded */
1499 void MPV_frame_end(MpegEncContext *s)
1500 {
1501     int i;
1502     /* draw edge for correct motion prediction if outside */
1503 #ifdef HAVE_XVMC
1504 //just to make sure that all data is rendered.
1505     if(s->avctx->xvmc_acceleration){
1506         XVMC_field_end(s);
1507     }else
1508 #endif
1509     if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1510             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1511             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1512             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1513     }
1514     emms_c();
1515     
1516     s->last_pict_type    = s->pict_type;
1517     if(s->pict_type!=B_TYPE){
1518         s->last_non_b_pict_type= s->pict_type;
1519     }
1520 #if 0
1521         /* copy back current_picture variables */
1522     for(i=0; i<MAX_PICTURE_COUNT; i++){
1523         if(s->picture[i].data[0] == s->current_picture.data[0]){
1524             s->picture[i]= s->current_picture;
1525             break;
1526         }    
1527     }
1528     assert(i<MAX_PICTURE_COUNT);
1529 #endif    
1530
1531     if(s->encoding){
1532         /* release non refernce frames */
1533         for(i=0; i<MAX_PICTURE_COUNT; i++){
1534             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1535                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1536             }
1537         }
1538     }
1539     // clear copies, to avoid confusion
1540 #if 0
1541     memset(&s->last_picture, 0, sizeof(Picture));
1542     memset(&s->next_picture, 0, sizeof(Picture));
1543     memset(&s->current_picture, 0, sizeof(Picture));
1544 #endif
1545 }
1546
1547 /**
1548  * draws an line from (ex, ey) -> (sx, sy).
1549  * @param w width of the image
1550  * @param h height of the image
1551  * @param stride stride/linesize of the image
1552  * @param color color of the arrow
1553  */
1554 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1555     int t, x, y, fr, f;
1556     
1557     sx= clip(sx, 0, w-1);
1558     sy= clip(sy, 0, h-1);
1559     ex= clip(ex, 0, w-1);
1560     ey= clip(ey, 0, h-1);
1561     
1562     buf[sy*stride + sx]+= color;
1563     
1564     if(ABS(ex - sx) > ABS(ey - sy)){
1565         if(sx > ex){
1566             t=sx; sx=ex; ex=t;
1567             t=sy; sy=ey; ey=t;
1568         }
1569         buf+= sx + sy*stride;
1570         ex-= sx;
1571         f= ((ey-sy)<<16)/ex;
1572         for(x= 0; x <= ex; x++){
1573             y = (x*f)>>16;
1574             fr= (x*f)&0xFFFF;
1575             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1576             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1577         }
1578     }else{
1579         if(sy > ey){
1580             t=sx; sx=ex; ex=t;
1581             t=sy; sy=ey; ey=t;
1582         }
1583         buf+= sx + sy*stride;
1584         ey-= sy;
1585         if(ey) f= ((ex-sx)<<16)/ey;
1586         else   f= 0;
1587         for(y= 0; y <= ey; y++){
1588             x = (y*f)>>16;
1589             fr= (y*f)&0xFFFF;
1590             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1591             buf[y*stride + x+1]+= (color*         fr )>>16;;
1592         }
1593     }
1594 }
1595
1596 /**
1597  * draws an arrow from (ex, ey) -> (sx, sy).
1598  * @param w width of the image
1599  * @param h height of the image
1600  * @param stride stride/linesize of the image
1601  * @param color color of the arrow
1602  */
1603 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
1604     int dx,dy;
1605
1606     sx= clip(sx, -100, w+100);
1607     sy= clip(sy, -100, h+100);
1608     ex= clip(ex, -100, w+100);
1609     ey= clip(ey, -100, h+100);
1610     
1611     dx= ex - sx;
1612     dy= ey - sy;
1613     
1614     if(dx*dx + dy*dy > 3*3){
1615         int rx=  dx + dy;
1616         int ry= -dx + dy;
1617         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1618         
1619         //FIXME subpixel accuracy
1620         rx= ROUNDED_DIV(rx*3<<4, length);
1621         ry= ROUNDED_DIV(ry*3<<4, length);
1622         
1623         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1624         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1625     }
1626     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1627 }
1628
1629 /**
1630  * prints debuging info for the given picture.
1631  */
1632 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1633
1634     if(!pict || !pict->mb_type) return;
1635
1636     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1637         int x,y;
1638         
1639         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1640         switch (pict->pict_type) {
1641             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1642             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1643             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1644             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1645             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1646             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;            
1647         }
1648         for(y=0; y<s->mb_height; y++){
1649             for(x=0; x<s->mb_width; x++){
1650                 if(s->avctx->debug&FF_DEBUG_SKIP){
1651                     int count= s->mbskip_table[x + y*s->mb_stride];
1652                     if(count>9) count=9;
1653                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1654                 }
1655                 if(s->avctx->debug&FF_DEBUG_QP){
1656                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1657                 }
1658                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1659                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1660                     //Type & MV direction
1661                     if(IS_PCM(mb_type))
1662                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1663                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1664                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1665                     else if(IS_INTRA4x4(mb_type))
1666                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1667                     else if(IS_INTRA16x16(mb_type))
1668                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1669                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1670                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1671                     else if(IS_DIRECT(mb_type))
1672                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1673                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1674                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1675                     else if(IS_GMC(mb_type))
1676                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1677                     else if(IS_SKIP(mb_type))
1678                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1679                     else if(!USES_LIST(mb_type, 1))
1680                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1681                     else if(!USES_LIST(mb_type, 0))
1682                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1683                     else{
1684                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1685                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1686                     }
1687                     
1688                     //segmentation
1689                     if(IS_8X8(mb_type))
1690                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1691                     else if(IS_16X8(mb_type))
1692                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1693                     else if(IS_8X16(mb_type))
1694                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1695                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1696                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1697                     else
1698                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1699                     
1700                         
1701                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1702                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1703                     else
1704                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1705                 }
1706 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1707             }
1708             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1709         }
1710     }
1711
1712     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1713         const int shift= 1 + s->quarter_sample;
1714         int mb_y;
1715         uint8_t *ptr;
1716         int i;
1717         int h_chroma_shift, v_chroma_shift;
1718         s->low_delay=0; //needed to see the vectors without trashing the buffers
1719
1720         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1721         for(i=0; i<3; i++){
1722             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*s->height:pict->linesize[i]*s->height >> v_chroma_shift);
1723             pict->data[i]= s->visualization_buffer[i];
1724         }
1725         pict->type= FF_BUFFER_TYPE_COPY;
1726         ptr= pict->data[0];
1727
1728         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1729             int mb_x;
1730             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1731                 const int mb_index= mb_x + mb_y*s->mb_stride;
1732                 if((s->avctx->debug_mv) && pict->motion_val){
1733                   int type;
1734                   for(type=0; type<3; type++){
1735                     int direction = 0;
1736                     switch (type) {
1737                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1738                                 continue;
1739                               direction = 0;
1740                               break;
1741                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1742                                 continue;
1743                               direction = 0;
1744                               break;
1745                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1746                                 continue;
1747                               direction = 1;
1748                               break;
1749                     }
1750                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1751                         continue;
1752
1753                     //FIXME for h264
1754                     if(IS_8X8(pict->mb_type[mb_index])){
1755                       int i;
1756                       for(i=0; i<4; i++){
1757                         int sx= mb_x*16 + 4 + 8*(i&1);
1758                         int sy= mb_y*16 + 4 + 8*(i>>1);
1759                         int xy= mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*s->b8_stride;
1760                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1761                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1762                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1763                       }
1764                     }else if(IS_16X8(pict->mb_type[mb_index])){
1765                       int i;
1766                       for(i=0; i<2; i++){
1767                         int sx=mb_x*16 + 8;
1768                         int sy=mb_y*16 + 4 + 8*i;
1769                         int xy= mb_x*2 + (mb_y*2 + i)*s->b8_stride;
1770                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1771                         int my=(pict->motion_val[direction][xy][1]>>shift);
1772                         
1773                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1774                             my*=2;
1775                         
1776                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, s->width, s->height, s->linesize, 100);
1777                       }
1778                     }else{
1779                       int sx= mb_x*16 + 8;
1780                       int sy= mb_y*16 + 8;
1781                       int xy= mb_x*2 + mb_y*2*s->b8_stride;
1782                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1783                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1784                       draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1785                     }
1786                   }                  
1787                 }
1788                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1789                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1790                     int y;
1791                     for(y=0; y<8; y++){
1792                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1793                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1794                     }
1795                 }
1796                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1797                     int mb_type= pict->mb_type[mb_index];
1798                     uint64_t u,v;
1799                     int y;
1800 #define COLOR(theta, r)\
1801 u= (int)(128 + r*cos(theta*3.141592/180));\
1802 v= (int)(128 + r*sin(theta*3.141592/180));
1803
1804                     
1805                     u=v=128;
1806                     if(IS_PCM(mb_type)){
1807                         COLOR(120,48)
1808                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1809                         COLOR(30,48)
1810                     }else if(IS_INTRA4x4(mb_type)){
1811                         COLOR(90,48)
1812                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1813 //                        COLOR(120,48)
1814                     }else if(IS_DIRECT(mb_type)){
1815                         COLOR(150,48)
1816                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1817                         COLOR(170,48)
1818                     }else if(IS_GMC(mb_type)){
1819                         COLOR(190,48)
1820                     }else if(IS_SKIP(mb_type)){
1821 //                        COLOR(180,48)
1822                     }else if(!USES_LIST(mb_type, 1)){
1823                         COLOR(240,48)
1824                     }else if(!USES_LIST(mb_type, 0)){
1825                         COLOR(0,48)
1826                     }else{
1827                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1828                         COLOR(300,48)
1829                     }
1830
1831                     u*= 0x0101010101010101ULL;
1832                     v*= 0x0101010101010101ULL;
1833                     for(y=0; y<8; y++){
1834                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1835                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1836                     }
1837
1838                     //segmentation
1839                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1840                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1841                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1842                     }
1843                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1844                         for(y=0; y<16; y++)
1845                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1846                     }
1847                         
1848                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1849                         // hmm
1850                     }
1851                 }
1852                 s->mbskip_table[mb_index]=0;
1853             }
1854         }
1855     }
1856 }
1857
1858 #ifdef CONFIG_ENCODERS
1859
1860 static int get_sae(uint8_t *src, int ref, int stride){
1861     int x,y;
1862     int acc=0;
1863     
1864     for(y=0; y<16; y++){
1865         for(x=0; x<16; x++){
1866             acc+= ABS(src[x+y*stride] - ref);
1867         }
1868     }
1869     
1870     return acc;
1871 }
1872
1873 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1874     int x, y, w, h;
1875     int acc=0;
1876     
1877     w= s->width &~15;
1878     h= s->height&~15;
1879     
1880     for(y=0; y<h; y+=16){
1881         for(x=0; x<w; x+=16){
1882             int offset= x + y*stride;
1883             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
1884             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1885             int sae = get_sae(src + offset, mean, stride);
1886             
1887             acc+= sae + 500 < sad;
1888         }
1889     }
1890     return acc;
1891 }
1892
1893
1894 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1895     AVFrame *pic=NULL;
1896     int i;
1897     const int encoding_delay= s->max_b_frames;
1898     int direct=1;
1899     
1900   if(pic_arg){
1901     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1902     if(pic_arg->linesize[0] != s->linesize) direct=0;
1903     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1904     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1905   
1906 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1907     
1908     if(direct){
1909         i= ff_find_unused_picture(s, 1);
1910
1911         pic= (AVFrame*)&s->picture[i];
1912         pic->reference= 3;
1913     
1914         for(i=0; i<4; i++){
1915             pic->data[i]= pic_arg->data[i];
1916             pic->linesize[i]= pic_arg->linesize[i];
1917         }
1918         alloc_picture(s, (Picture*)pic, 1);
1919     }else{
1920         int offset= 16;
1921         i= ff_find_unused_picture(s, 0);
1922
1923         pic= (AVFrame*)&s->picture[i];
1924         pic->reference= 3;
1925
1926         alloc_picture(s, (Picture*)pic, 0);
1927
1928         if(   pic->data[0] + offset == pic_arg->data[0] 
1929            && pic->data[1] + offset == pic_arg->data[1]
1930            && pic->data[2] + offset == pic_arg->data[2]){
1931        // empty
1932         }else{
1933             int h_chroma_shift, v_chroma_shift;
1934             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1935         
1936             for(i=0; i<3; i++){
1937                 int src_stride= pic_arg->linesize[i];
1938                 int dst_stride= i ? s->uvlinesize : s->linesize;
1939                 int h_shift= i ? h_chroma_shift : 0;
1940                 int v_shift= i ? v_chroma_shift : 0;
1941                 int w= s->width >>h_shift;
1942                 int h= s->height>>v_shift;
1943                 uint8_t *src= pic_arg->data[i];
1944                 uint8_t *dst= pic->data[i] + offset;
1945             
1946                 if(src_stride==dst_stride)
1947                     memcpy(dst, src, src_stride*h);
1948                 else{
1949                     while(h--){
1950                         memcpy(dst, src, w);
1951                         dst += dst_stride;
1952                         src += src_stride;
1953                     }
1954                 }
1955             }
1956         }
1957     }
1958     copy_picture_attributes(s, pic, pic_arg);
1959     
1960     pic->display_picture_number= s->input_picture_number++;
1961     if(pic->pts != AV_NOPTS_VALUE){ 
1962         s->user_specified_pts= pic->pts;
1963     }else{
1964         if(s->user_specified_pts){
1965             pic->pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate;
1966             av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pic->pts);
1967         }else{
1968             pic->pts= av_rescale(pic->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate);
1969         }
1970     }
1971   }
1972   
1973     /* shift buffer entries */
1974     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1975         s->input_picture[i-1]= s->input_picture[i];
1976         
1977     s->input_picture[encoding_delay]= (Picture*)pic;
1978
1979     return 0;
1980 }
1981
1982 static void select_input_picture(MpegEncContext *s){
1983     int i;
1984
1985     for(i=1; i<MAX_PICTURE_COUNT; i++)
1986         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1987     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1988
1989     /* set next picture types & ordering */
1990     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1991         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1992             s->reordered_input_picture[0]= s->input_picture[0];
1993             s->reordered_input_picture[0]->pict_type= I_TYPE;
1994             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
1995         }else{
1996             int b_frames;
1997             
1998             if(s->flags&CODEC_FLAG_PASS2){
1999                 for(i=0; i<s->max_b_frames+1; i++){
2000                     int pict_num= s->input_picture[0]->display_picture_number + i;
2001                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
2002                     s->input_picture[i]->pict_type= pict_type;
2003                     
2004                     if(i + 1 >= s->rc_context.num_entries) break;
2005                 }
2006             }
2007
2008             if(s->input_picture[0]->pict_type){
2009                 /* user selected pict_type */
2010                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
2011                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
2012                 }
2013             
2014                 if(b_frames > s->max_b_frames){
2015                     av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
2016                     b_frames = s->max_b_frames;
2017                 }
2018             }else if(s->avctx->b_frame_strategy==0){
2019                 b_frames= s->max_b_frames;
2020                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2021             }else if(s->avctx->b_frame_strategy==1){
2022                 for(i=1; i<s->max_b_frames+1; i++){
2023                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2024                         s->input_picture[i]->b_frame_score= 
2025                             get_intra_count(s, s->input_picture[i  ]->data[0], 
2026                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2027                     }
2028                 }
2029                 for(i=0; i<s->max_b_frames; i++){
2030                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2031                 }
2032                                 
2033                 b_frames= FFMAX(0, i-1);
2034                 
2035                 /* reset scores */
2036                 for(i=0; i<b_frames+1; i++){
2037                     s->input_picture[i]->b_frame_score=0;
2038                 }
2039             }else{
2040                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2041                 b_frames=0;
2042             }
2043
2044             emms_c();
2045 //static int b_count=0;
2046 //b_count+= b_frames;
2047 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2048             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2049                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2050                     b_frames=0;
2051                 s->input_picture[b_frames]->pict_type= I_TYPE;
2052             }
2053             
2054             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2055                && b_frames
2056                && s->input_picture[b_frames]->pict_type== I_TYPE)
2057                 b_frames--;
2058
2059             s->reordered_input_picture[0]= s->input_picture[b_frames];
2060             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2061                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2062             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2063             for(i=0; i<b_frames; i++){
2064                 s->reordered_input_picture[i+1]= s->input_picture[i];
2065                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2066                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2067             }
2068         }
2069     }
2070     
2071     if(s->reordered_input_picture[0]){
2072         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2073
2074         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2075
2076         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2077             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
2078         
2079             int i= ff_find_unused_picture(s, 0);
2080             Picture *pic= &s->picture[i];
2081
2082             /* mark us unused / free shared pic */
2083             for(i=0; i<4; i++)
2084                 s->reordered_input_picture[0]->data[i]= NULL;
2085             s->reordered_input_picture[0]->type= 0;
2086             
2087             pic->reference              = s->reordered_input_picture[0]->reference;
2088             
2089             alloc_picture(s, pic, 0);
2090
2091             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2092
2093             s->current_picture_ptr= pic;
2094         }else{
2095             // input is not a shared pix -> reuse buffer for current_pix
2096
2097             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
2098                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2099             
2100             s->current_picture_ptr= s->reordered_input_picture[0];
2101             for(i=0; i<4; i++){
2102                 s->new_picture.data[i]+=16;
2103             }
2104         }
2105         copy_picture(&s->current_picture, s->current_picture_ptr);
2106     
2107         s->picture_number= s->new_picture.display_picture_number;
2108 //printf("dpn:%d\n", s->picture_number);
2109     }else{
2110        memset(&s->new_picture, 0, sizeof(Picture));
2111     }
2112 }
2113
2114 int MPV_encode_picture(AVCodecContext *avctx,
2115                        unsigned char *buf, int buf_size, void *data)
2116 {
2117     MpegEncContext *s = avctx->priv_data;
2118     AVFrame *pic_arg = data;
2119     int i, stuffing_count;
2120
2121     if(avctx->pix_fmt != PIX_FMT_YUV420P){
2122         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2123         return -1;
2124     }
2125     
2126     for(i=0; i<avctx->thread_count; i++){
2127         int start_y= s->thread_context[i]->start_mb_y;
2128         int   end_y= s->thread_context[i]->  end_mb_y;
2129         int h= s->mb_height;
2130         uint8_t *start= buf + buf_size*start_y/h;
2131         uint8_t *end  = buf + buf_size*  end_y/h;
2132
2133         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2134     }
2135
2136     s->picture_in_gop_number++;
2137
2138     load_input_picture(s, pic_arg);
2139     
2140     select_input_picture(s);
2141     
2142     /* output? */
2143     if(s->new_picture.data[0]){
2144         s->pict_type= s->new_picture.pict_type;
2145 //emms_c();
2146 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2147         MPV_frame_start(s, avctx);
2148
2149         encode_picture(s, s->picture_number);
2150         
2151         avctx->real_pict_num  = s->picture_number;
2152         avctx->header_bits = s->header_bits;
2153         avctx->mv_bits     = s->mv_bits;
2154         avctx->misc_bits   = s->misc_bits;
2155         avctx->i_tex_bits  = s->i_tex_bits;
2156         avctx->p_tex_bits  = s->p_tex_bits;
2157         avctx->i_count     = s->i_count;
2158         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2159         avctx->skip_count  = s->skip_count;
2160
2161         MPV_frame_end(s);
2162
2163         if (s->out_format == FMT_MJPEG)
2164             mjpeg_picture_trailer(s);
2165         
2166         if(s->flags&CODEC_FLAG_PASS1)
2167             ff_write_pass1_stats(s);
2168
2169         for(i=0; i<4; i++){
2170             avctx->error[i] += s->current_picture_ptr->error[i];
2171         }
2172
2173         flush_put_bits(&s->pb);
2174         s->frame_bits  = put_bits_count(&s->pb);
2175
2176         stuffing_count= ff_vbv_update(s, s->frame_bits);
2177         if(stuffing_count){
2178             switch(s->codec_id){
2179             case CODEC_ID_MPEG1VIDEO:
2180             case CODEC_ID_MPEG2VIDEO:
2181                 while(stuffing_count--){
2182                     put_bits(&s->pb, 8, 0);
2183                 }
2184             break;
2185             case CODEC_ID_MPEG4:
2186                 put_bits(&s->pb, 16, 0);
2187                 put_bits(&s->pb, 16, 0x1C3);
2188                 stuffing_count -= 4;
2189                 while(stuffing_count--){
2190                     put_bits(&s->pb, 8, 0xFF);
2191                 }
2192             break;
2193             default:
2194                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2195             }
2196             flush_put_bits(&s->pb);
2197             s->frame_bits  = put_bits_count(&s->pb);
2198         }
2199
2200         /* update mpeg1/2 vbv_delay for CBR */    
2201         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2202            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2203             int vbv_delay;
2204
2205             assert(s->repeat_first_field==0);
2206             
2207             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2208             assert(vbv_delay < 0xFFFF);
2209
2210             s->vbv_delay_ptr[0] &= 0xF8;
2211             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2212             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2213             s->vbv_delay_ptr[2] &= 0x07;
2214             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2215         }
2216         s->total_bits += s->frame_bits;
2217         avctx->frame_bits  = s->frame_bits;
2218     }else{
2219         assert((pbBufPtr(&s->pb) == s->pb.buf));
2220         s->frame_bits=0;
2221     }
2222     assert((s->frame_bits&7)==0);
2223     
2224     return s->frame_bits/8;
2225 }
2226
2227 #endif //CONFIG_ENCODERS
2228
2229 static inline void gmc1_motion(MpegEncContext *s,
2230                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2231                                uint8_t **ref_picture)
2232 {
2233     uint8_t *ptr;
2234     int offset, src_x, src_y, linesize, uvlinesize;
2235     int motion_x, motion_y;
2236     int emu=0;
2237
2238     motion_x= s->sprite_offset[0][0];
2239     motion_y= s->sprite_offset[0][1];
2240     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2241     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2242     motion_x<<=(3-s->sprite_warping_accuracy);
2243     motion_y<<=(3-s->sprite_warping_accuracy);
2244     src_x = clip(src_x, -16, s->width);
2245     if (src_x == s->width)
2246         motion_x =0;
2247     src_y = clip(src_y, -16, s->height);
2248     if (src_y == s->height)
2249         motion_y =0;
2250
2251     linesize = s->linesize;
2252     uvlinesize = s->uvlinesize;
2253     
2254     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2255
2256     if(s->flags&CODEC_FLAG_EMU_EDGE){
2257         if(   (unsigned)src_x >= s->h_edge_pos - 17
2258            || (unsigned)src_y >= s->v_edge_pos - 17){
2259             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2260             ptr= s->edge_emu_buffer;
2261         }
2262     }
2263     
2264     if((motion_x|motion_y)&7){
2265         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2266         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2267     }else{
2268         int dxy;
2269         
2270         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2271         if (s->no_rounding){
2272             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2273         }else{
2274             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2275         }
2276     }
2277     
2278     if(s->flags&CODEC_FLAG_GRAY) return;
2279
2280     motion_x= s->sprite_offset[1][0];
2281     motion_y= s->sprite_offset[1][1];
2282     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2283     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2284     motion_x<<=(3-s->sprite_warping_accuracy);
2285     motion_y<<=(3-s->sprite_warping_accuracy);
2286     src_x = clip(src_x, -8, s->width>>1);
2287     if (src_x == s->width>>1)
2288         motion_x =0;
2289     src_y = clip(src_y, -8, s->height>>1);
2290     if (src_y == s->height>>1)
2291         motion_y =0;
2292
2293     offset = (src_y * uvlinesize) + src_x;
2294     ptr = ref_picture[1] + offset;
2295     if(s->flags&CODEC_FLAG_EMU_EDGE){
2296         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2297            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2298             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2299             ptr= s->edge_emu_buffer;
2300             emu=1;
2301         }
2302     }
2303     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2304     
2305     ptr = ref_picture[2] + offset;
2306     if(emu){
2307         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2308         ptr= s->edge_emu_buffer;
2309     }
2310     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2311     
2312     return;
2313 }
2314
2315 static inline void gmc_motion(MpegEncContext *s,
2316                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2317                                uint8_t **ref_picture)
2318 {
2319     uint8_t *ptr;
2320     int linesize, uvlinesize;
2321     const int a= s->sprite_warping_accuracy;
2322     int ox, oy;
2323
2324     linesize = s->linesize;
2325     uvlinesize = s->uvlinesize;
2326
2327     ptr = ref_picture[0];
2328
2329     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2330     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2331
2332     s->dsp.gmc(dest_y, ptr, linesize, 16,
2333            ox, 
2334            oy, 
2335            s->sprite_delta[0][0], s->sprite_delta[0][1],
2336            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2337            a+1, (1<<(2*a+1)) - s->no_rounding,
2338            s->h_edge_pos, s->v_edge_pos);
2339     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2340            ox + s->sprite_delta[0][0]*8, 
2341            oy + s->sprite_delta[1][0]*8, 
2342            s->sprite_delta[0][0], s->sprite_delta[0][1],
2343            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2344            a+1, (1<<(2*a+1)) - s->no_rounding,
2345            s->h_edge_pos, s->v_edge_pos);
2346
2347     if(s->flags&CODEC_FLAG_GRAY) return;
2348
2349     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2350     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2351
2352     ptr = ref_picture[1];
2353     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2354            ox, 
2355            oy, 
2356            s->sprite_delta[0][0], s->sprite_delta[0][1],
2357            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2358            a+1, (1<<(2*a+1)) - s->no_rounding,
2359            s->h_edge_pos>>1, s->v_edge_pos>>1);
2360     
2361     ptr = ref_picture[2];
2362     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2363            ox, 
2364            oy, 
2365            s->sprite_delta[0][0], s->sprite_delta[0][1],
2366            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2367            a+1, (1<<(2*a+1)) - s->no_rounding,
2368            s->h_edge_pos>>1, s->v_edge_pos>>1);
2369 }
2370
2371 /**
2372  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2373  * @param buf destination buffer
2374  * @param src source buffer
2375  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2376  * @param block_w width of block
2377  * @param block_h height of block
2378  * @param src_x x coordinate of the top left sample of the block in the source buffer
2379  * @param src_y y coordinate of the top left sample of the block in the source buffer
2380  * @param w width of the source buffer
2381  * @param h height of the source buffer
2382  */
2383 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
2384                                     int src_x, int src_y, int w, int h){
2385     int x, y;
2386     int start_y, start_x, end_y, end_x;
2387
2388     if(src_y>= h){
2389         src+= (h-1-src_y)*linesize;
2390         src_y=h-1;
2391     }else if(src_y<=-block_h){
2392         src+= (1-block_h-src_y)*linesize;
2393         src_y=1-block_h;
2394     }
2395     if(src_x>= w){
2396         src+= (w-1-src_x);
2397         src_x=w-1;
2398     }else if(src_x<=-block_w){
2399         src+= (1-block_w-src_x);
2400         src_x=1-block_w;
2401     }
2402
2403     start_y= FFMAX(0, -src_y);
2404     start_x= FFMAX(0, -src_x);
2405     end_y= FFMIN(block_h, h-src_y);
2406     end_x= FFMIN(block_w, w-src_x);
2407
2408     // copy existing part
2409     for(y=start_y; y<end_y; y++){
2410         for(x=start_x; x<end_x; x++){
2411             buf[x + y*linesize]= src[x + y*linesize];
2412         }
2413     }
2414
2415     //top
2416     for(y=0; y<start_y; y++){
2417         for(x=start_x; x<end_x; x++){
2418             buf[x + y*linesize]= buf[x + start_y*linesize];
2419         }
2420     }
2421
2422     //bottom
2423     for(y=end_y; y<block_h; y++){
2424         for(x=start_x; x<end_x; x++){
2425             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2426         }
2427     }
2428                                     
2429     for(y=0; y<block_h; y++){
2430        //left
2431         for(x=0; x<start_x; x++){
2432             buf[x + y*linesize]= buf[start_x + y*linesize];
2433         }
2434        
2435        //right
2436         for(x=end_x; x<block_w; x++){
2437             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2438         }
2439     }
2440 }
2441
2442 static inline int hpel_motion(MpegEncContext *s, 
2443                                   uint8_t *dest, uint8_t *src,
2444                                   int field_based, int field_select,
2445                                   int src_x, int src_y,
2446                                   int width, int height, int stride,
2447                                   int h_edge_pos, int v_edge_pos,
2448                                   int w, int h, op_pixels_func *pix_op,
2449                                   int motion_x, int motion_y)
2450 {
2451     int dxy;
2452     int emu=0;
2453
2454     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2455     src_x += motion_x >> 1;
2456     src_y += motion_y >> 1;
2457                 
2458     /* WARNING: do no forget half pels */
2459     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2460     if (src_x == width)
2461         dxy &= ~1;
2462     src_y = clip(src_y, -16, height);
2463     if (src_y == height)
2464         dxy &= ~2;
2465     src += src_y * stride + src_x;
2466
2467     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2468         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2469            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2470             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2471                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2472             src= s->edge_emu_buffer;
2473             emu=1;
2474         }
2475     }
2476     if(field_select)
2477         src += s->linesize;
2478     pix_op[dxy](dest, src, stride, h);
2479     return emu;
2480 }
2481
2482 /* apply one mpeg motion vector to the three components */
2483 static always_inline void mpeg_motion(MpegEncContext *s,
2484                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2485                                int field_based, int bottom_field, int field_select,
2486                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2487                                int motion_x, int motion_y, int h)
2488 {
2489     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2490     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2491     
2492 #if 0    
2493 if(s->quarter_sample)
2494 {
2495     motion_x>>=1;
2496     motion_y>>=1;
2497 }
2498 #endif
2499
2500     v_edge_pos = s->v_edge_pos >> field_based;
2501     linesize   = s->current_picture.linesize[0] << field_based;
2502     uvlinesize = s->current_picture.linesize[1] << field_based;
2503
2504     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2505     src_x = s->mb_x* 16               + (motion_x >> 1);
2506     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2507
2508     if (s->out_format == FMT_H263) {
2509         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2510             mx = (motion_x>>1)|(motion_x&1);
2511             my = motion_y >>1;
2512             uvdxy = ((my & 1) << 1) | (mx & 1);
2513             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2514             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2515         }else{
2516             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2517             uvsrc_x = src_x>>1;
2518             uvsrc_y = src_y>>1;
2519         }
2520     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2521         mx = motion_x / 4;
2522         my = motion_y / 4;
2523         uvdxy = 0;
2524         uvsrc_x = s->mb_x*8 + mx;
2525         uvsrc_y = s->mb_y*8 + my;
2526     } else {
2527         if(s->chroma_y_shift){
2528             mx = motion_x / 2;
2529             my = motion_y / 2;
2530             uvdxy = ((my & 1) << 1) | (mx & 1);
2531             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2532             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2533         } else {
2534             if(s->chroma_x_shift){
2535             //Chroma422
2536                 mx = motion_x / 2;
2537                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2538                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2539                 uvsrc_y = src_y;
2540             } else {
2541             //Chroma444
2542                 uvdxy = dxy;
2543                 uvsrc_x = src_x;
2544                 uvsrc_y = src_y;
2545             }
2546         }
2547     }
2548
2549     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2550     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2551     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2552
2553     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2554        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2555             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2556                s->codec_id == CODEC_ID_MPEG1VIDEO){
2557                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2558                 return ;
2559             }
2560             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2561                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2562             ptr_y = s->edge_emu_buffer;
2563             if(!(s->flags&CODEC_FLAG_GRAY)){
2564                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2565                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
2566                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2567                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
2568                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2569                 ptr_cb= uvbuf;
2570                 ptr_cr= uvbuf+16;
2571             }
2572     }
2573
2574     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2575         dest_y += s->linesize;
2576         dest_cb+= s->uvlinesize;
2577         dest_cr+= s->uvlinesize;
2578     }
2579
2580     if(field_select){
2581         ptr_y += s->linesize;
2582         ptr_cb+= s->uvlinesize;
2583         ptr_cr+= s->uvlinesize;
2584     }
2585
2586     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2587     
2588     if(!(s->flags&CODEC_FLAG_GRAY)){
2589         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2590         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2591     }
2592 }
2593 //FIXME move to dsputil, avg variant, 16x16 version
2594 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2595     int x;
2596     uint8_t * const top   = src[1];
2597     uint8_t * const left  = src[2];
2598     uint8_t * const mid   = src[0];
2599     uint8_t * const right = src[3];
2600     uint8_t * const bottom= src[4];
2601 #define OBMC_FILTER(x, t, l, m, r, b)\
2602     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2603 #define OBMC_FILTER4(x, t, l, m, r, b)\
2604     OBMC_FILTER(x         , t, l, m, r, b);\
2605     OBMC_FILTER(x+1       , t, l, m, r, b);\
2606     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2607     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2608     
2609     x=0;
2610     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2611     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2612     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2613     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2614     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2615     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2616     x+= stride;
2617     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2618     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2619     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2620     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2621     x+= stride;
2622     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2623     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2624     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2625     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2626     x+= 2*stride;
2627     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2628     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2629     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2630     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2631     x+= 2*stride;
2632     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2633     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2634     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2635     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2636     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2637     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2638     x+= stride;
2639     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2640     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2641     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2642     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2643 }
2644
2645 /* obmc for 1 8x8 luma block */
2646 static inline void obmc_motion(MpegEncContext *s,
2647                                uint8_t *dest, uint8_t *src,
2648                                int src_x, int src_y,
2649                                op_pixels_func *pix_op,
2650                                int16_t mv[5][2]/* mid top left right bottom*/)
2651 #define MID    0
2652 {
2653     int i;
2654     uint8_t *ptr[5];
2655     
2656     assert(s->quarter_sample==0);
2657     
2658     for(i=0; i<5; i++){
2659         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2660             ptr[i]= ptr[MID];
2661         }else{
2662             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
2663             hpel_motion(s, ptr[i], src, 0, 0,
2664                         src_x, src_y,
2665                         s->width, s->height, s->linesize,
2666                         s->h_edge_pos, s->v_edge_pos,
2667                         8, 8, pix_op,
2668                         mv[i][0], mv[i][1]);
2669         }
2670     }
2671
2672     put_obmc(dest, ptr, s->linesize);                
2673 }
2674
2675 static inline void qpel_motion(MpegEncContext *s,
2676                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2677                                int field_based, int bottom_field, int field_select,
2678                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2679                                qpel_mc_func (*qpix_op)[16],
2680                                int motion_x, int motion_y, int h)
2681 {
2682     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2683     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
2684
2685     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2686     src_x = s->mb_x *  16                 + (motion_x >> 2);
2687     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
2688
2689     v_edge_pos = s->v_edge_pos >> field_based;
2690     linesize = s->linesize << field_based;
2691     uvlinesize = s->uvlinesize << field_based;
2692     
2693     if(field_based){
2694         mx= motion_x/2;
2695         my= motion_y>>1;
2696     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2697         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2698         mx= (motion_x>>1) + rtab[motion_x&7];
2699         my= (motion_y>>1) + rtab[motion_y&7];
2700     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2701         mx= (motion_x>>1)|(motion_x&1);
2702         my= (motion_y>>1)|(motion_y&1);
2703     }else{
2704         mx= motion_x/2;
2705         my= motion_y/2;
2706     }
2707     mx= (mx>>1)|(mx&1);
2708     my= (my>>1)|(my&1);
2709
2710     uvdxy= (mx&1) | ((my&1)<<1);
2711     mx>>=1;
2712     my>>=1;
2713
2714     uvsrc_x = s->mb_x *  8                 + mx;
2715     uvsrc_y = s->mb_y * (8 >> field_based) + my;
2716
2717     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
2718     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2719     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2720
2721     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
2722        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
2723         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, 
2724                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2725         ptr_y= s->edge_emu_buffer;
2726         if(!(s->flags&CODEC_FLAG_GRAY)){
2727             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
2728             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based, 
2729                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2730             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based, 
2731                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2732             ptr_cb= uvbuf;
2733             ptr_cr= uvbuf + 16;
2734         }
2735     }
2736
2737     if(!field_based)
2738         qpix_op[0][dxy](dest_y, ptr_y, linesize);
2739     else{
2740         if(bottom_field){
2741             dest_y += s->linesize;
2742             dest_cb+= s->uvlinesize;
2743             dest_cr+= s->uvlinesize;
2744         }
2745
2746         if(field_select){
2747             ptr_y  += s->linesize;
2748             ptr_cb += s->uvlinesize;
2749             ptr_cr += s->uvlinesize;
2750         }
2751         //damn interlaced mode
2752         //FIXME boundary mirroring is not exactly correct here
2753         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
2754         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
2755     }
2756     if(!(s->flags&CODEC_FLAG_GRAY)){
2757         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
2758         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
2759     }
2760 }
2761
2762 inline int ff_h263_round_chroma(int x){
2763     if (x >= 0)
2764         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2765     else {
2766         x = -x;
2767         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2768     }
2769 }
2770
2771 /**
2772  * h263 chorma 4mv motion compensation.
2773  */
2774 static inline void chroma_4mv_motion(MpegEncContext *s,
2775                                      uint8_t *dest_cb, uint8_t *dest_cr,
2776                                      uint8_t **ref_picture,
2777                                      op_pixels_func *pix_op,
2778                                      int mx, int my){
2779     int dxy, emu=0, src_x, src_y, offset;
2780     uint8_t *ptr;
2781     
2782     /* In case of 8X8, we construct a single chroma motion vector
2783        with a special rounding */
2784     mx= ff_h263_round_chroma(mx);
2785     my= ff_h263_round_chroma(my);
2786     
2787     dxy = ((my & 1) << 1) | (mx & 1);
2788     mx >>= 1;
2789     my >>= 1;
2790
2791     src_x = s->mb_x * 8 + mx;
2792     src_y = s->mb_y * 8 + my;
2793     src_x = clip(src_x, -8, s->width/2);
2794     if (src_x == s->width/2)
2795         dxy &= ~1;
2796     src_y = clip(src_y, -8, s->height/2);
2797     if (src_y == s->height/2)
2798         dxy &= ~2;
2799     
2800     offset = (src_y * (s->uvlinesize)) + src_x;
2801     ptr = ref_picture[1] + offset;
2802     if(s->flags&CODEC_FLAG_EMU_EDGE){
2803         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
2804            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
2805             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2806             ptr= s->edge_emu_buffer;
2807             emu=1;
2808         }
2809     }
2810     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
2811
2812     ptr = ref_picture[2] + offset;
2813     if(emu){
2814         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2815         ptr= s->edge_emu_buffer;
2816     }
2817     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
2818 }
2819
2820 /**
2821  * motion compesation of a single macroblock
2822  * @param s context
2823  * @param dest_y luma destination pointer
2824  * @param dest_cb chroma cb/u destination pointer
2825  * @param dest_cr chroma cr/v destination pointer
2826  * @param dir direction (0->forward, 1->backward)
2827  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2828  * @param pic_op halfpel motion compensation function (average or put normally)
2829  * @param pic_op qpel motion compensation function (average or put normally)
2830  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2831  */
2832 static inline void MPV_motion(MpegEncContext *s, 
2833                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2834                               int dir, uint8_t **ref_picture, 
2835                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2836 {
2837     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
2838     int mb_x, mb_y, i;
2839     uint8_t *ptr, *dest;
2840
2841     mb_x = s->mb_x;
2842     mb_y = s->mb_y;
2843
2844     if(s->obmc && s->pict_type != B_TYPE){
2845         int16_t mv_cache[4][4][2];
2846         const int xy= s->mb_x + s->mb_y*s->mb_stride;
2847         const int mot_stride= s->b8_stride;
2848         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
2849
2850         assert(!s->mb_skiped);
2851                 
2852         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
2853         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2854         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2855
2856         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
2857             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
2858         }else{
2859             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
2860         }
2861
2862         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
2863             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
2864             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
2865         }else{
2866             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
2867             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
2868         }
2869
2870         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
2871             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
2872             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
2873         }else{
2874             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
2875             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
2876         }
2877         
2878         mx = 0;
2879         my = 0;
2880         for(i=0;i<4;i++) {
2881             const int x= (i&1)+1;
2882             const int y= (i>>1)+1;
2883             int16_t mv[5][2]= {
2884                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
2885                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
2886                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
2887                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
2888                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
2889             //FIXME cleanup
2890             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2891                         ref_picture[0],
2892                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2893                         pix_op[1],
2894                         mv);
2895
2896             mx += mv[0][0];
2897             my += mv[0][1];
2898         }
2899         if(!(s->flags&CODEC_FLAG_GRAY))
2900             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2901
2902         return;
2903     }
2904    
2905     switch(s->mv_type) {
2906     case MV_TYPE_16X16:
2907 #ifdef CONFIG_RISKY
2908         if(s->mcsel){
2909             if(s->real_sprite_warping_points==1){
2910                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
2911                             ref_picture);
2912             }else{
2913                 gmc_motion(s, dest_y, dest_cb, dest_cr,
2914                             ref_picture);
2915             }
2916         }else if(s->quarter_sample){
2917             qpel_motion(s, dest_y, dest_cb, dest_cr, 
2918                         0, 0, 0,
2919                         ref_picture, pix_op, qpix_op,
2920                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2921         }else if(s->mspel){
2922             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2923                         ref_picture, pix_op,
2924                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2925         }else
2926 #endif
2927         {
2928             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
2929                         0, 0, 0,
2930                         ref_picture, pix_op,
2931                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2932         }           
2933         break;
2934     case MV_TYPE_8X8:
2935         mx = 0;
2936         my = 0;
2937         if(s->quarter_sample){
2938             for(i=0;i<4;i++) {
2939                 motion_x = s->mv[dir][i][0];
2940                 motion_y = s->mv[dir][i][1];
2941
2942                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2943                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2944                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2945                     
2946                 /* WARNING: do no forget half pels */
2947                 src_x = clip(src_x, -16, s->width);
2948                 if (src_x == s->width)
2949                     dxy &= ~3;
2950                 src_y = clip(src_y, -16, s->height);
2951                 if (src_y == s->height)
2952                     dxy &= ~12;
2953                     
2954                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2955                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2956                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
2957                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
2958                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2959                         ptr= s->edge_emu_buffer;
2960                     }
2961                 }
2962                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2963                 qpix_op[1][dxy](dest, ptr, s->linesize);
2964
2965                 mx += s->mv[dir][i][0]/2;
2966                 my += s->mv[dir][i][1]/2;
2967             }
2968         }else{
2969             for(i=0;i<4;i++) {
2970                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2971                             ref_picture[0], 0, 0,
2972                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2973                             s->width, s->height, s->linesize,
2974                             s->h_edge_pos, s->v_edge_pos,
2975                             8, 8, pix_op[1],
2976                             s->mv[dir][i][0], s->mv[dir][i][1]);
2977
2978                 mx += s->mv[dir][i][0];
2979                 my += s->mv[dir][i][1];
2980             }
2981         }
2982
2983         if(!(s->flags&CODEC_FLAG_GRAY))
2984             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2985         break;
2986     case MV_TYPE_FIELD:
2987         if (s->picture_structure == PICT_FRAME) {
2988             if(s->quarter_sample){
2989                 for(i=0; i<2; i++){
2990                     qpel_motion(s, dest_y, dest_cb, dest_cr,
2991                                 1, i, s->field_select[dir][i],
2992                                 ref_picture, pix_op, qpix_op,
2993                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
2994                 }
2995             }else{
2996                 /* top field */       
2997                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
2998                             1, 0, s->field_select[dir][0],
2999                             ref_picture, pix_op,
3000                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3001                 /* bottom field */
3002                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3003                             1, 1, s->field_select[dir][1],
3004                             ref_picture, pix_op,
3005                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3006             }
3007         } else {
3008             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3009                 ref_picture= s->current_picture_ptr->data;
3010             } 
3011
3012             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3013                         0, 0, s->field_select[dir][0],
3014                         ref_picture, pix_op,
3015                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3016         }
3017         break;
3018     case MV_TYPE_16X8:
3019         for(i=0; i<2; i++){
3020             uint8_t ** ref2picture;
3021
3022             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3023                 ref2picture= ref_picture;
3024             }else{
3025                 ref2picture= s->current_picture_ptr->data;
3026             } 
3027
3028             mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3029                         0, 0, s->field_select[dir][i],
3030                         ref2picture, pix_op,
3031                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3032                 
3033             dest_y += 16*s->linesize;
3034             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3035             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3036         }        
3037         break;
3038     case MV_TYPE_DMV:
3039         if(s->picture_structure == PICT_FRAME){
3040             for(i=0; i<2; i++){
3041                 int j;
3042                 for(j=0; j<2; j++){
3043                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3044                                 1, j, j^i,
3045                                 ref_picture, pix_op,
3046                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3047                 }
3048                 pix_op = s->dsp.avg_pixels_tab; 
3049             }
3050         }else{
3051             for(i=0; i<2; i++){
3052                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 
3053                             0, 0, s->picture_structure != i+1,
3054                             ref_picture, pix_op,
3055                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3056
3057                 // after put we make avg of the same block
3058                 pix_op=s->dsp.avg_pixels_tab; 
3059
3060                 //opposite parity is always in the same frame if this is second field
3061                 if(!s->first_field){
3062                     ref_picture = s->current_picture_ptr->data;    
3063                 }
3064             }
3065         }
3066     break;
3067     default: assert(0);
3068     }
3069 }
3070
3071
3072 /* put block[] to dest[] */
3073 static inline void put_dct(MpegEncContext *s, 
3074                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3075 {
3076     s->dct_unquantize_intra(s, block, i, qscale);
3077     s->dsp.idct_put (dest, line_size, block);
3078 }
3079
3080 /* add block[] to dest[] */
3081 static inline void add_dct(MpegEncContext *s, 
3082                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3083 {
3084     if (s->block_last_index[i] >= 0) {
3085         s->dsp.idct_add (dest, line_size, block);
3086     }
3087 }
3088
3089 static inline void add_dequant_dct(MpegEncContext *s, 
3090                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3091 {
3092     if (s->block_last_index[i] >= 0) {
3093         s->dct_unquantize_inter(s, block, i, qscale);
3094
3095         s->dsp.idct_add (dest, line_size, block);
3096     }
3097 }
3098
3099 /**
3100  * cleans dc, ac, coded_block for the current non intra MB
3101  */
3102 void ff_clean_intra_table_entries(MpegEncContext *s)
3103 {
3104     int wrap = s->b8_stride;
3105     int xy = s->block_index[0];
3106     
3107     s->dc_val[0][xy           ] = 
3108     s->dc_val[0][xy + 1       ] = 
3109     s->dc_val[0][xy     + wrap] =
3110     s->dc_val[0][xy + 1 + wrap] = 1024;
3111     /* ac pred */
3112     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3113     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3114     if (s->msmpeg4_version>=3) {
3115         s->coded_block[xy           ] =
3116         s->coded_block[xy + 1       ] =
3117         s->coded_block[xy     + wrap] =
3118         s->coded_block[xy + 1 + wrap] = 0;
3119     }
3120     /* chroma */
3121     wrap = s->mb_stride;
3122     xy = s->mb_x + s->mb_y * wrap;
3123     s->dc_val[1][xy] =
3124     s->dc_val[2][xy] = 1024;
3125     /* ac pred */
3126     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3127     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3128     
3129     s->mbintra_table[xy]= 0;
3130 }
3131
3132 /* generic function called after a macroblock has been parsed by the
3133    decoder or after it has been encoded by the encoder.
3134
3135    Important variables used:
3136    s->mb_intra : true if intra macroblock
3137    s->mv_dir   : motion vector direction
3138    s->mv_type  : motion vector type
3139    s->mv       : motion vector
3140    s->interlaced_dct : true if interlaced dct used (mpeg2)
3141  */
3142 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64])
3143 {
3144     int mb_x, mb_y;
3145     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3146 #ifdef HAVE_XVMC
3147     if(s->avctx->xvmc_acceleration){
3148         XVMC_decode_mb(s);//xvmc uses pblocks
3149         return;
3150     }
3151 #endif
3152
3153     mb_x = s->mb_x;
3154     mb_y = s->mb_y;
3155
3156     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3157        /* save DCT coefficients */
3158        int i,j;
3159        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3160        for(i=0; i<6; i++)
3161            for(j=0; j<64; j++)
3162                *dct++ = block[i][s->dsp.idct_permutation[j]];
3163     }
3164
3165     s->current_picture.qscale_table[mb_xy]= s->qscale;
3166
3167     /* update DC predictors for P macroblocks */
3168     if (!s->mb_intra) {
3169         if (s->h263_pred || s->h263_aic) {
3170             if(s->mbintra_table[mb_xy])
3171                 ff_clean_intra_table_entries(s);
3172         } else {
3173             s->last_dc[0] =
3174             s->last_dc[1] =
3175             s->last_dc[2] = 128 << s->intra_dc_precision;
3176         }
3177     }
3178     else if (s->h263_pred || s->h263_aic)
3179         s->mbintra_table[mb_xy]=1;
3180
3181     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3182         uint8_t *dest_y, *dest_cb, *dest_cr;
3183         int dct_linesize, dct_offset;
3184         op_pixels_func (*op_pix)[4];
3185         qpel_mc_func (*op_qpix)[16];
3186         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3187         const int uvlinesize= s->current_picture.linesize[1];
3188         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band;
3189
3190         /* avoid copy if macroblock skipped in last frame too */
3191         /* skip only during decoding as we might trash the buffers during encoding a bit */
3192         if(!s->encoding){
3193             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3194             const int age= s->current_picture.age;
3195
3196             assert(age);
3197
3198             if (s->mb_skiped) {
3199                 s->mb_skiped= 0;
3200                 assert(s->pict_type!=I_TYPE);
3201  
3202                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
3203                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3204
3205                 /* if previous was skipped too, then nothing to do !  */
3206                 if (*mbskip_ptr >= age && s->current_picture.reference){
3207                     return;
3208                 }
3209             } else if(!s->current_picture.reference){
3210                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3211                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3212             } else{
3213                 *mbskip_ptr = 0; /* not skipped */
3214             }
3215         }
3216
3217         dct_linesize = linesize << s->interlaced_dct;
3218         dct_offset =(s->interlaced_dct)? linesize : linesize*8;
3219
3220         if(readable){
3221             dest_y=  s->dest[0];
3222             dest_cb= s->dest[1];
3223             dest_cr= s->dest[2];
3224         }else{
3225             dest_y = s->b_scratchpad;
3226             dest_cb= s->b_scratchpad+16*linesize;
3227             dest_cr= s->b_scratchpad+32*linesize;
3228         }
3229         if (!s->mb_intra) {
3230             /* motion handling */
3231             /* decoding or more than one mb_type (MC was allready done otherwise) */
3232             if(!s->encoding){
3233                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
3234                     op_pix = s->dsp.put_pixels_tab;
3235                     op_qpix= s->dsp.put_qpel_pixels_tab;
3236                 }else{
3237                     op_pix = s->dsp.put_no_rnd_pixels_tab;
3238                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3239                 }
3240
3241                 if (s->mv_dir & MV_DIR_FORWARD) {
3242                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3243                     op_pix = s->dsp.avg_pixels_tab;
3244                     op_qpix= s->dsp.avg_qpel_pixels_tab;
3245                 }
3246                 if (s->mv_dir & MV_DIR_BACKWARD) {
3247                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3248                 }
3249             }
3250
3251             /* skip dequant / idct if we are really late ;) */
3252             if(s->hurry_up>1) return;
3253
3254             /* add dct residue */
3255             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3256                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3257                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
3258                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
3259                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
3260                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
3261
3262                 if(!(s->flags&CODEC_FLAG_GRAY)){
3263                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3264                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3265                 }
3266             } else if(s->codec_id != CODEC_ID_WMV2){
3267                 add_dct(s, block[0], 0, dest_y, dct_linesize);
3268                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
3269                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
3270                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
3271
3272                 if(!(s->flags&CODEC_FLAG_GRAY)){
3273                     if(s->chroma_y_shift){//Chroma420
3274                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3275                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
3276                     }else{
3277                         //chroma422
3278                         dct_linesize = uvlinesize << s->interlaced_dct;
3279                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3280
3281                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
3282                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
3283                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3284                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3285                         if(!s->chroma_x_shift){//Chroma444
3286                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3287                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3288                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3289                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3290                         }
3291                     }
3292                 }//fi gray
3293             }
3294 #ifdef CONFIG_RISKY
3295             else{
3296                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3297             }
3298 #endif
3299         } else {
3300             /* dct only in intra block */
3301             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3302                 put_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
3303                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
3304                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
3305                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
3306
3307                 if(!(s->flags&CODEC_FLAG_GRAY)){
3308                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3309                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3310                 }
3311             }else{
3312                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
3313                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
3314                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
3315                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
3316
3317                 if(!(s->flags&CODEC_FLAG_GRAY)){
3318                     if(s->chroma_y_shift){
3319                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3320                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3321                     }else{
3322
3323                         dct_linesize = uvlinesize << s->interlaced_dct;
3324                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3325
3326                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
3327                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
3328                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
3329                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
3330                         if(!s->chroma_x_shift){//Chroma444
3331                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
3332                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
3333                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
3334                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
3335                         }
3336                     }
3337                 }//gray
3338             }
3339         }
3340         if(!readable){
3341             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
3342             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
3343             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
3344         }
3345     }
3346 }
3347
3348 #ifdef CONFIG_ENCODERS
3349
3350 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
3351 {
3352     static const char tab[64]=
3353         {3,2,2,1,1,1,1,1,
3354          1,1,1,1,1,1,1,1,
3355          1,1,1,1,1,1,1,1,
3356          0,0,0,0,0,0,0,0,
3357          0,0,0,0,0,0,0,0,
3358          0,0,0,0,0,0,0,0,
3359          0,0,0,0,0,0,0,0,
3360          0,0,0,0,0,0,0,0};
3361     int score=0;
3362     int run=0;
3363     int i;
3364     DCTELEM *block= s->block[n];
3365     const int last_index= s->block_last_index[n];
3366     int skip_dc;
3367
3368     if(threshold<0){
3369         skip_dc=0;
3370         threshold= -threshold;
3371     }else
3372         skip_dc=1;
3373
3374     /* are all which we could set to zero are allready zero? */
3375     if(last_index<=skip_dc - 1) return;
3376
3377     for(i=0; i<=last_index; i++){
3378         const int j = s->intra_scantable.permutated[i];
3379         const int level = ABS(block[j]);
3380         if(level==1){
3381             if(skip_dc && i==0) continue;
3382             score+= tab[run];
3383             run=0;
3384         }else if(level>1){
3385             return;
3386         }else{
3387             run++;
3388         }
3389     }
3390     if(score >= threshold) return;
3391     for(i=skip_dc; i<=last_index; i++){
3392         const int j = s->intra_scantable.permutated[i];
3393         block[j]=0;
3394     }
3395     if(block[0]) s->block_last_index[n]= 0;
3396     else         s->block_last_index[n]= -1;
3397 }
3398
3399 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3400 {
3401     int i;
3402     const int maxlevel= s->max_qcoeff;
3403     const int minlevel= s->min_qcoeff;
3404     int overflow=0;
3405     
3406     if(s->mb_intra){
3407         i=1; //skip clipping of intra dc
3408     }else
3409         i=0;
3410     
3411     for(;i<=last_index; i++){
3412         const int j= s->intra_scantable.permutated[i];
3413         int level = block[j];
3414        
3415         if     (level>maxlevel){
3416             level=maxlevel;
3417             overflow++;
3418         }else if(level<minlevel){
3419             level=minlevel;
3420             overflow++;
3421         }
3422         
3423         block[j]= level;
3424     }
3425     
3426     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
3427         av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
3428 }
3429
3430 #endif //CONFIG_ENCODERS
3431
3432 /**
3433  *
3434  * @param h is the normal height, this will be reduced automatically if needed for the last row
3435  */
3436 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3437     if (s->avctx->draw_horiz_band) {
3438         AVFrame *src;
3439         int offset[4];
3440         
3441         if(s->picture_structure != PICT_FRAME){
3442             h <<= 1;
3443             y <<= 1;
3444             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3445         }
3446
3447         h= FFMIN(h, s->height - y);
3448
3449         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
3450             src= (AVFrame*)s->current_picture_ptr;
3451         else if(s->last_picture_ptr)
3452             src= (AVFrame*)s->last_picture_ptr;
3453         else
3454             return;
3455             
3456         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3457             offset[0]=
3458             offset[1]=
3459             offset[2]=
3460             offset[3]= 0;
3461         }else{
3462             offset[0]= y * s->linesize;;
3463             offset[1]= 
3464             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
3465             offset[3]= 0;
3466         }
3467
3468         emms_c();
3469
3470         s->avctx->draw_horiz_band(s->avctx, src, offset,
3471                                   y, s->picture_structure, h);
3472     }
3473 }
3474
3475 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
3476     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3477     const int uvlinesize= s->current_picture.linesize[1];
3478         
3479     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
3480     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
3481     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
3482     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
3483     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3484     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
3485     //block_index is not used by mpeg2, so it is not affected by chroma_format
3486
3487     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << 4);
3488     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (4 - s->chroma_x_shift));
3489     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (4 - s->chroma_x_shift));
3490
3491     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
3492     {
3493         s->dest[0] += s->mb_y *   linesize << 4;
3494         s->dest[1] += s->mb_y * uvlinesize << (4 - s->chroma_y_shift);
3495         s->dest[2] += s->mb_y * uvlinesize << (4 - s->chroma_y_shift);
3496     }
3497 }
3498
3499 #ifdef CONFIG_ENCODERS
3500
3501 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
3502     int x, y;
3503 //FIXME optimize
3504     for(y=0; y<8; y++){
3505         for(x=0; x<8; x++){
3506             int x2, y2;
3507             int sum=0;
3508             int sqr=0;
3509             int count=0;
3510
3511             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
3512                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
3513                     int v= ptr[x2 + y2*stride];
3514                     sum += v;
3515                     sqr += v*v;
3516                     count++;
3517                 }
3518             }
3519             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
3520         }
3521     }
3522 }
3523
3524 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
3525 {
3526     int16_t weight[6][64];
3527     DCTELEM orig[6][64];
3528     const int mb_x= s->mb_x;
3529     const int mb_y= s->mb_y;
3530     int i;
3531     int skip_dct[6];
3532     int dct_offset   = s->linesize*8; //default for progressive frames
3533     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3534     int wrap_y, wrap_c;
3535     
3536     for(i=0; i<6; i++) skip_dct[i]=0;
3537     
3538     if(s->adaptive_quant){
3539         const int last_qp= s->qscale;
3540         const int mb_xy= mb_x + mb_y*s->mb_stride;
3541
3542         s->lambda= s->lambda_table[mb_xy];
3543         update_qscale(s);
3544     
3545         if(!(s->flags&CODEC_FLAG_QP_RD)){
3546             s->dquant= s->qscale - last_qp;
3547
3548             if(s->out_format==FMT_H263){
3549                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
3550             
3551                 if(s->codec_id==CODEC_ID_MPEG4){        
3552                     if(!s->mb_intra){
3553                         if(s->pict_type == B_TYPE){
3554                             if(s->dquant&1) 
3555                                 s->dquant= (s->dquant/2)*2;
3556                             if(s->mv_dir&MV_DIRECT)
3557                                 s->dquant= 0;
3558                         }
3559                         if(s->mv_type==MV_TYPE_8X8)
3560                             s->dquant=0;
3561                     }
3562                 }
3563             }
3564         }
3565         ff_set_qscale(s, last_qp + s->dquant);
3566     }else if(s->flags&CODEC_FLAG_QP_RD)
3567         ff_set_qscale(s, s->qscale + s->dquant);
3568
3569     wrap_y = s->linesize;
3570     wrap_c = s->uvlinesize;
3571     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
3572     ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
3573     ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
3574
3575     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
3576         uint8_t *ebuf= s->edge_emu_buffer + 32;
3577         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
3578         ptr_y= ebuf;
3579         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3580         ptr_cb= ebuf+18*wrap_y;
3581         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3582         ptr_cr= ebuf+18*wrap_y+8;
3583     }
3584
3585     if (s->mb_intra) {
3586         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3587             int progressive_score, interlaced_score;
3588
3589             s->interlaced_dct=0;
3590             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8) 
3591                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
3592
3593             if(progressive_score > 0){
3594                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8) 
3595                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
3596                 if(progressive_score > interlaced_score){
3597                     s->interlaced_dct=1;
3598             
3599                     dct_offset= wrap_y;
3600                     wrap_y<<=1;
3601                 }
3602             }
3603         }
3604         
3605         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
3606         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
3607         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
3608         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
3609
3610         if(s->flags&CODEC_FLAG_GRAY){
3611             skip_dct[4]= 1;
3612             skip_dct[5]= 1;
3613         }else{
3614             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
3615             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
3616         }
3617     }else{
3618         op_pixels_func (*op_pix)[4];
3619         qpel_mc_func (*op_qpix)[16];
3620         uint8_t *dest_y, *dest_cb, *dest_cr;
3621
3622         dest_y  = s->dest[0];
3623         dest_cb = s->dest[1];
3624         dest_cr = s->dest[2];
3625
3626         if ((!s->no_rounding) || s->pict_type==B_TYPE){
3627             op_pix = s->dsp.put_pixels_tab;
3628             op_qpix= s->dsp.put_qpel_pixels_tab;
3629         }else{
3630             op_pix = s->dsp.put_no_rnd_pixels_tab;
3631             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3632         }
3633
3634         if (s->mv_dir & MV_DIR_FORWARD) {
3635             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3636             op_pix = s->dsp.avg_pixels_tab;
3637             op_qpix= s->dsp.avg_qpel_pixels_tab;
3638         }
3639         if (s->mv_dir & MV_DIR_BACKWARD) {
3640             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3641         }
3642
3643         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3644             int progressive_score, interlaced_score;
3645
3646             s->interlaced_dct=0;
3647             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
3648                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
3649             
3650             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
3651
3652             if(progressive_score>0){
3653                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
3654                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
3655             
3656                 if(progressive_score > interlaced_score){
3657                     s->interlaced_dct=1;
3658             
3659                     dct_offset= wrap_y;
3660                     wrap_y<<=1;
3661                 }
3662             }
3663         }
3664         
3665         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
3666         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
3667         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
3668         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
3669         
3670         if(s->flags&CODEC_FLAG_GRAY){
3671             skip_dct[4]= 1;
3672             skip_dct[5]= 1;
3673         }else{
3674             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
3675             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
3676         }
3677         /* pre quantization */         
3678         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
3679             //FIXME optimize
3680             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
3681             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
3682             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
3683             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
3684             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
3685             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
3686         }
3687     }
3688
3689     if(s->avctx->quantizer_noise_shaping){
3690         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
3691         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
3692         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
3693         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
3694         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
3695         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
3696         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
3697     }
3698             
3699     /* DCT & quantize */
3700     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
3701     {
3702         for(i=0;i<6;i++) {
3703             if(!skip_dct[i]){
3704                 int overflow;
3705                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3706             // FIXME we could decide to change to quantizer instead of clipping
3707             // JS: I don't think that would be a good idea it could lower quality instead
3708             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3709                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3710             }else
3711                 s->block_last_index[i]= -1;
3712         }
3713         if(s->avctx->quantizer_noise_shaping){
3714             for(i=0;i<6;i++) {
3715                 if(!skip_dct[i]){
3716                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
3717                 }
3718             }
3719         }
3720         
3721         if(s->luma_elim_threshold && !s->mb_intra)
3722             for(i=0; i<4; i++)
3723                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3724         if(s->chroma_elim_threshold && !s->mb_intra)
3725             for(i=4; i<6; i++)
3726                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3727
3728         if(s->flags & CODEC_FLAG_CBP_RD){
3729             for(i=0;i<6;i++) {
3730                 if(s->block_last_index[i] == -1)
3731                     s->coded_score[i]= INT_MAX/256;
3732             }
3733         }
3734     }
3735
3736     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3737         s->block_last_index[4]=
3738         s->block_last_index[5]= 0;
3739         s->block[4][0]=
3740         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3741     }
3742
3743     //non c quantize code returns incorrect block_last_index FIXME
3744     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
3745         for(i=0; i<6; i++){
3746             int j;
3747             if(s->block_last_index[i]>0){
3748                 for(j=63; j>0; j--){
3749                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
3750                 }
3751                 s->block_last_index[i]= j;
3752             }
3753         }
3754     }
3755
3756     /* huffman encode */
3757     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3758     case CODEC_ID_MPEG1VIDEO:
3759     case CODEC_ID_MPEG2VIDEO:
3760         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3761 #ifdef CONFIG_RISKY
3762     case CODEC_ID_MPEG4:
3763         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3764     case CODEC_ID_MSMPEG4V2:
3765     case CODEC_ID_MSMPEG4V3:
3766     case CODEC_ID_WMV1:
3767         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3768     case CODEC_ID_WMV2:
3769          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3770     case CODEC_ID_H263:
3771     case CODEC_ID_H263P:
3772     case CODEC_ID_FLV1:
3773     case CODEC_ID_RV10:
3774         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3775 #endif
3776     case CODEC_ID_MJPEG:
3777         mjpeg_encode_mb(s, s->block); break;
3778     default:
3779         assert(0);
3780     }
3781 }
3782
3783 #endif //CONFIG_ENCODERS
3784
3785 void ff_mpeg_flush(AVCodecContext *avctx){
3786     int i;
3787     MpegEncContext *s = avctx->priv_data;
3788     
3789     if(s==NULL || s->picture==NULL) 
3790         return;
3791     
3792     for(i=0; i<MAX_PICTURE_COUNT; i++){
3793        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3794                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3795         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3796     }
3797     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
3798     
3799     s->parse_context.state= -1;
3800     s->parse_context.frame_start_found= 0;
3801     s->parse_context.overread= 0;
3802     s->parse_context.overread_index= 0;
3803     s->parse_context.index= 0;
3804     s->parse_context.last_index= 0;
3805     s->bitstream_buffer_size=0;
3806 }
3807
3808 #ifdef CONFIG_ENCODERS
3809 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3810 {
3811     const uint16_t *srcw= (uint16_t*)src;
3812     int words= length>>4;
3813     int bits= length&15;
3814     int i;
3815
3816     if(length==0) return;
3817     
3818     if(words < 16){
3819         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
3820     }else if(put_bits_count(pb)&7){
3821         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
3822     }else{
3823         for(i=0; put_bits_count(pb)&31; i++)
3824             put_bits(pb, 8, src[i]);
3825         flush_put_bits(pb);
3826         memcpy(pbBufPtr(pb), src+i, 2*words-i);
3827         skip_put_bytes(pb, 2*words-i);
3828     }
3829         
3830     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
3831 }
3832
3833 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3834     int i;
3835
3836     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3837
3838     /* mpeg1 */
3839     d->mb_skip_run= s->mb_skip_run;
3840     for(i=0; i<3; i++)
3841         d->last_dc[i]= s->last_dc[i];
3842     
3843     /* statistics */
3844     d->mv_bits= s->mv_bits;
3845     d->i_tex_bits= s->i_tex_bits;
3846     d->p_tex_bits= s->p_tex_bits;
3847     d->i_count= s->i_count;
3848     d->f_count= s->f_count;
3849     d->b_count= s->b_count;
3850     d->skip_count= s->skip_count;
3851     d->misc_bits= s->misc_bits;
3852     d->last_bits= 0;
3853
3854     d->mb_skiped= 0;
3855     d->qscale= s->qscale;
3856     d->dquant= s->dquant;
3857 }
3858
3859 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3860     int i;
3861
3862     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
3863     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3864     
3865     /* mpeg1 */
3866     d->mb_skip_run= s->mb_skip_run;
3867     for(i=0; i<3; i++)
3868         d->last_dc[i]= s->last_dc[i];
3869     
3870     /* statistics */
3871     d->mv_bits= s->mv_bits;
3872     d->i_tex_bits= s->i_tex_bits;
3873     d->p_tex_bits= s->p_tex_bits;
3874     d->i_count= s->i_count;
3875     d->f_count= s->f_count;
3876     d->b_count= s->b_count;
3877     d->skip_count= s->skip_count;
3878     d->misc_bits= s->misc_bits;
3879
3880     d->mb_intra= s->mb_intra;
3881     d->mb_skiped= s->mb_skiped;
3882     d->mv_type= s->mv_type;
3883     d->mv_dir= s->mv_dir;
3884     d->pb= s->pb;
3885     if(s->data_partitioning){
3886         d->pb2= s->pb2;
3887         d->tex_pb= s->tex_pb;
3888     }
3889     d->block= s->block;
3890     for(i=0; i<6; i++)
3891         d->block_last_index[i]= s->block_last_index[i];
3892     d->interlaced_dct= s->interlaced_dct;
3893     d->qscale= s->qscale;
3894 }
3895
3896 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
3897                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3898                            int *dmin, int *next_block, int motion_x, int motion_y)
3899 {
3900     int score;
3901     uint8_t *dest_backup[3];
3902     
3903     copy_context_before_encode(s, backup, type);
3904
3905     s->block= s->blocks[*next_block];
3906     s->pb= pb[*next_block];
3907     if(s->data_partitioning){
3908         s->pb2   = pb2   [*next_block];
3909         s->tex_pb= tex_pb[*next_block];
3910     }
3911     
3912     if(*next_block){
3913         memcpy(dest_backup, s->dest, sizeof(s->dest));
3914         s->dest[0] = s->rd_scratchpad;
3915         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
3916         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
3917         assert(s->linesize >= 32); //FIXME
3918     }
3919
3920     encode_mb(s, motion_x, motion_y);
3921     
3922     score= put_bits_count(&s->pb);
3923     if(s->data_partitioning){
3924         score+= put_bits_count(&s->pb2);
3925         score+= put_bits_count(&s->tex_pb);
3926     }
3927    
3928     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3929         MPV_decode_mb(s, s->block);
3930
3931         score *= s->lambda2;
3932         score += sse_mb(s) << FF_LAMBDA_SHIFT;
3933     }
3934     
3935     if(*next_block){
3936         memcpy(s->dest, dest_backup, sizeof(s->dest));
3937     }
3938
3939     if(score<*dmin){
3940         *dmin= score;
3941         *next_block^=1;
3942
3943         copy_context_after_encode(best, s, type);
3944     }
3945 }
3946                 
3947 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3948     uint32_t *sq = squareTbl + 256;
3949     int acc=0;
3950     int x,y;
3951     
3952     if(w==16 && h==16) 
3953         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
3954     else if(w==8 && h==8)
3955         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
3956     
3957     for(y=0; y<h; y++){
3958         for(x=0; x<w; x++){
3959             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3960         } 
3961     }
3962     
3963     assert(acc>=0);
3964     
3965     return acc;
3966 }
3967
3968 static int sse_mb(MpegEncContext *s){
3969     int w= 16;
3970     int h= 16;
3971
3972     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3973     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3974
3975     if(w==16 && h==16)
3976       if(s->avctx->mb_cmp == FF_CMP_NSSE){
3977         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
3978                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
3979                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
3980       }else{
3981         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
3982                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
3983                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
3984       }
3985     else
3986         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3987                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3988                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3989 }
3990
3991 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
3992     MpegEncContext *s= arg;
3993
3994     
3995     s->me.pre_pass=1;
3996     s->me.dia_size= s->avctx->pre_dia_size;
3997     s->first_slice_line=1;
3998     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
3999         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4000             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4001         }
4002         s->first_slice_line=0;
4003     }
4004     
4005     s->me.pre_pass=0;
4006     
4007     return 0;
4008 }
4009
4010 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4011     MpegEncContext *s= arg;
4012
4013     s->me.dia_size= s->avctx->dia_size;
4014     s->first_slice_line=1;
4015     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4016         s->mb_x=0; //for block init below
4017         ff_init_block_index(s);
4018         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4019             s->block_index[0]+=2;
4020             s->block_index[1]+=2;
4021             s->block_index[2]+=2;
4022             s->block_index[3]+=2;
4023             
4024             /* compute motion vector & mb_type and store in context */
4025             if(s->pict_type==B_TYPE)
4026                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4027             else
4028                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4029         }
4030         s->first_slice_line=0;
4031     }
4032     return 0;
4033 }
4034
4035 static int mb_var_thread(AVCodecContext *c, void *arg){
4036     MpegEncContext *s= arg;
4037     int mb_x, mb_y;
4038
4039     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4040         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4041             int xx = mb_x * 16;
4042             int yy = mb_y * 16;
4043             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4044             int varc;
4045             int sum = s->dsp.pix_sum(pix, s->linesize);
4046     
4047             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4048
4049             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4050             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4051             s->me.mb_var_sum_temp    += varc;
4052         }
4053     }
4054     return 0;
4055 }
4056
4057 static void write_slice_end(MpegEncContext *s){
4058     if(s->codec_id==CODEC_ID_MPEG4){
4059         if(s->partitioned_frame){
4060             ff_mpeg4_merge_partitions(s);
4061         }
4062     
4063         ff_mpeg4_stuffing(&s->pb);
4064     }else if(s->out_format == FMT_MJPEG){
4065         ff_mjpeg_stuffing(&s->pb);
4066     }
4067
4068     align_put_bits(&s->pb);
4069     flush_put_bits(&s->pb);
4070 }
4071
4072 static int encode_thread(AVCodecContext *c, void *arg){
4073     MpegEncContext *s= arg;
4074     int mb_x, mb_y, pdif = 0;
4075     int i, j;
4076     MpegEncContext best_s, backup_s;
4077     uint8_t bit_buf[2][3000];
4078     uint8_t bit_buf2[2][3000];
4079     uint8_t bit_buf_tex[2][3000];
4080     PutBitContext pb[2], pb2[2], tex_pb[2];
4081 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4082
4083     for(i=0; i<2; i++){
4084         init_put_bits(&pb    [i], bit_buf    [i], 3000);
4085         init_put_bits(&pb2   [i], bit_buf2   [i], 3000);
4086         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000);
4087     }
4088
4089     s->last_bits= put_bits_count(&s->pb);
4090     s->mv_bits=0;
4091     s->misc_bits=0;
4092     s->i_tex_bits=0;
4093     s->p_tex_bits=0;
4094     s->i_count=0;
4095     s->f_count=0;
4096     s->b_count=0;
4097     s->skip_count=0;
4098
4099     for(i=0; i<3; i++){
4100         /* init last dc values */
4101         /* note: quant matrix value (8) is implied here */
4102         s->last_dc[i] = 128 << s->intra_dc_precision;
4103         
4104         s->current_picture_ptr->error[i] = 0;
4105     }
4106     s->mb_skip_run = 0;
4107     memset(s->last_mv, 0, sizeof(s->last_mv));
4108      
4109     s->last_mv_dir = 0;
4110
4111 #ifdef CONFIG_RISKY
4112     switch(s->codec_id){
4113     case CODEC_ID_H263:
4114     case CODEC_ID_H263P:
4115     case CODEC_ID_FLV1:
4116         s->gob_index = ff_h263_get_gob_height(s);
4117         break;
4118     case CODEC_ID_MPEG4:
4119         if(s->partitioned_frame)
4120             ff_mpeg4_init_partitions(s);
4121         break;
4122     }
4123 #endif
4124
4125     s->resync_mb_x=0;
4126     s->resync_mb_y=0; 
4127     s->first_slice_line = 1;
4128     s->ptr_lastgob = s->pb.buf;
4129     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4130 //    printf("row %d at %X\n", s->mb_y, (int)s);
4131         s->mb_x=0;
4132         s->mb_y= mb_y;
4133
4134         ff_set_qscale(s, s->qscale);
4135         ff_init_block_index(s);
4136         
4137         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4138             const int xy= mb_y*s->mb_stride + mb_x;
4139             int mb_type= s->mb_type[xy];
4140 //            int d;
4141             int dmin= INT_MAX;
4142             int dir;
4143
4144             s->mb_x = mb_x;
4145             ff_update_block_index(s);
4146
4147             /* write gob / video packet header  */
4148 #ifdef CONFIG_RISKY
4149             if(s->rtp_mode){
4150                 int current_packet_size, is_gob_start;
4151                 
4152                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4153                 
4154                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; 
4155                 
4156                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4157                 
4158                 switch(s->codec_id){
4159                 case CODEC_ID_H263:
4160                 case CODEC_ID_H263P:
4161                     if(!s->h263_slice_structured)
4162                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4163                     break;
4164                 case CODEC_ID_MPEG2VIDEO:
4165                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4166                 case CODEC_ID_MPEG1VIDEO:
4167                     if(s->mb_skip_run) is_gob_start=0;
4168                     break;
4169                 }
4170
4171                 if(is_gob_start){
4172                     if(s->start_mb_y != mb_y || mb_x!=0){
4173                         write_slice_end(s);
4174
4175                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4176                             ff_mpeg4_init_partitions(s);
4177                         }
4178                     }
4179                 
4180                     assert((put_bits_count(&s->pb)&7) == 0);
4181                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4182                     
4183                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4184                         int r= put_bits_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y;
4185                         int d= 100 / s->avctx->error_rate;
4186                         if(r % d == 0){
4187                             current_packet_size=0;
4188 #ifndef ALT_BITSTREAM_WRITER
4189                             s->pb.buf_ptr= s->ptr_lastgob;
4190 #endif
4191                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4192                         }
4193                     }
4194         
4195                     if (s->avctx->rtp_callback)
4196                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, 0);
4197                     
4198                     switch(s->codec_id){
4199                     case CODEC_ID_MPEG4:
4200                         ff_mpeg4_encode_video_packet_header(s);
4201                         ff_mpeg4_clean_buffers(s);
4202                     break;
4203                     case CODEC_ID_MPEG1VIDEO:
4204                     case CODEC_ID_MPEG2VIDEO:
4205                         ff_mpeg1_encode_slice_header(s);
4206                         ff_mpeg1_clean_buffers(s);
4207                     break;
4208                     case CODEC_ID_H263:
4209                     case CODEC_ID_H263P:
4210                         h263_encode_gob_header(s, mb_y);                       
4211                     break;
4212                     }
4213
4214                     if(s->flags&CODEC_FLAG_PASS1){
4215                         int bits= put_bits_count(&s->pb);
4216                         s->misc_bits+= bits - s->last_bits;
4217                         s->last_bits= bits;
4218                     }
4219     
4220                     s->ptr_lastgob += current_packet_size;
4221                     s->first_slice_line=1;
4222                     s->resync_mb_x=mb_x;
4223                     s->resync_mb_y=mb_y;
4224                 }
4225             }
4226 #endif
4227
4228             if(  (s->resync_mb_x   == s->mb_x)
4229                && s->resync_mb_y+1 == s->mb_y){
4230                 s->first_slice_line=0; 
4231             }
4232
4233             s->mb_skiped=0;
4234             s->dquant=0; //only for QP_RD
4235
4236             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible
4237                 int next_block=0;
4238                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4239
4240                 copy_context_before_encode(&backup_s, s, -1);
4241                 backup_s.pb= s->pb;
4242                 best_s.data_partitioning= s->data_partitioning;
4243                 best_s.partitioned_frame= s->partitioned_frame;
4244                 if(s->data_partitioning){
4245                     backup_s.pb2= s->pb2;
4246                     backup_s.tex_pb= s->tex_pb;
4247                 }
4248
4249                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
4250                     s->mv_dir = MV_DIR_FORWARD;
4251                     s->mv_type = MV_TYPE_16X16;
4252                     s->mb_intra= 0;
4253                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4254                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4255                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, 
4256                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4257                 }
4258                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ 
4259                     s->mv_dir = MV_DIR_FORWARD;
4260                     s->mv_type = MV_TYPE_FIELD;
4261                     s->mb_intra= 0;
4262                     for(i=0; i<2; i++){
4263                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4264                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4265                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4266                     }
4267                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, 
4268                                  &dmin, &next_block, 0, 0);
4269                 }
4270                 if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
4271                     s->mv_dir = MV_DIR_FORWARD;
4272                     s->mv_type = MV_TYPE_16X16;
4273                     s->mb_intra= 0;
4274                     s->mv[0][0][0] = 0;
4275                     s->mv[0][0][1] = 0;
4276                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, 
4277                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4278                 }
4279                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){                 
4280                     s->mv_dir = MV_DIR_FORWARD;
4281                     s->mv_type = MV_TYPE_8X8;
4282                     s->mb_intra= 0;
4283                     for(i=0; i<4; i++){
4284                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4285                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4286                     }
4287                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, 
4288                                  &dmin, &next_block, 0, 0);
4289                 }
4290                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
4291                     s->mv_dir = MV_DIR_FORWARD;
4292                     s->mv_type = MV_TYPE_16X16;
4293                     s->mb_intra= 0;
4294                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4295                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4296                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, 
4297                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4298                 }
4299                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
4300                     s->mv_dir = MV_DIR_BACKWARD;
4301                     s->mv_type = MV_TYPE_16X16;
4302                     s->mb_intra= 0;
4303                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4304                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4305                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
4306                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4307                 }
4308                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
4309                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4310                     s->mv_type = MV_TYPE_16X16;
4311                     s->mb_intra= 0;
4312                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4313                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4314                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4315                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4316                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, 
4317                                  &dmin, &next_block, 0, 0);
4318                 }
4319                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
4320                     int mx= s->b_direct_mv_table[xy][0];
4321                     int my= s->b_direct_mv_table[xy][1];
4322                     
4323                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4324                     s->mb_intra= 0;
4325 #ifdef CONFIG_RISKY
4326                     ff_mpeg4_set_direct_mv(s, mx, my);
4327 #endif
4328                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
4329                                  &dmin, &next_block, mx, my);
4330                 }
4331                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ 
4332                     s->mv_dir = MV_DIR_FORWARD;
4333                     s->mv_type = MV_TYPE_FIELD;
4334                     s->mb_intra= 0;
4335                     for(i=0; i<2; i++){
4336                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4337                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4338                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4339                     }
4340                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, 
4341                                  &dmin, &next_block, 0, 0);
4342                 }
4343                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ 
4344                     s->mv_dir = MV_DIR_BACKWARD;
4345                     s->mv_type = MV_TYPE_FIELD;
4346                     s->mb_intra= 0;
4347                     for(i=0; i<2; i++){
4348                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4349                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4350                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4351                     }
4352                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, 
4353                                  &dmin, &next_block, 0, 0);
4354                 }
4355                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ 
4356                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4357                     s->mv_type = MV_TYPE_FIELD;
4358                     s->mb_intra= 0;
4359                     for(dir=0; dir<2; dir++){
4360                         for(i=0; i<2; i++){
4361                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4362                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4363                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4364                         }
4365                     }
4366                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, 
4367                                  &dmin, &next_block, 0, 0);
4368                 }
4369                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
4370                     s->mv_dir = 0;
4371                     s->mv_type = MV_TYPE_16X16;
4372                     s->mb_intra= 1;
4373                     s->mv[0][0][0] = 0;
4374                     s->mv[0][0][1] = 0;
4375                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, 
4376                                  &dmin, &next_block, 0, 0);
4377                     if(s->h263_pred || s->h263_aic){
4378                         if(best_s.mb_intra)
4379                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4380                         else
4381                             ff_clean_intra_table_entries(s); //old mode?
4382                     }
4383                 }
4384
4385                 if(s->flags & CODEC_FLAG_QP_RD){
4386                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4387                         const int last_qp= backup_s.qscale;
4388                         int dquant, dir, qp, dc[6];
4389                         DCTELEM ac[6][16];
4390                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4391                         
4392                         assert(backup_s.dquant == 0);
4393
4394                         //FIXME intra
4395                         s->mv_dir= best_s.mv_dir;
4396                         s->mv_type = MV_TYPE_16X16;
4397                         s->mb_intra= best_s.mb_intra;
4398                         s->mv[0][0][0] = best_s.mv[0][0][0];
4399                         s->mv[0][0][1] = best_s.mv[0][0][1];
4400                         s->mv[1][0][0] = best_s.mv[1][0][0];
4401                         s->mv[1][0][1] = best_s.mv[1][0][1];
4402                         
4403                         dir= s->pict_type == B_TYPE ? 2 : 1;
4404                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4405                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4406                             qp= last_qp + dquant;
4407                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4408                                 break;
4409                             backup_s.dquant= dquant;
4410                             if(s->mb_intra){
4411                                 for(i=0; i<6; i++){
4412                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4413                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4414                                 }
4415                             }
4416
4417                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
4418                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4419                             if(best_s.qscale != qp){
4420                                 if(s->mb_intra){
4421                                     for(i=0; i<6; i++){
4422                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4423                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4424                                     }
4425                                 }
4426                                 if(dir > 0 && dquant==dir){
4427                                     dquant= 0;
4428                                     dir= -dir;
4429                                 }else
4430                                     break;
4431                             }
4432                         }
4433                         qp= best_s.qscale;
4434                         s->current_picture.qscale_table[xy]= qp;
4435                     }
4436                 }
4437
4438                 copy_context_after_encode(s, &best_s, -1);
4439                 
4440                 pb_bits_count= put_bits_count(&s->pb);
4441                 flush_put_bits(&s->pb);
4442                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4443                 s->pb= backup_s.pb;
4444                 
4445                 if(s->data_partitioning){
4446                     pb2_bits_count= put_bits_count(&s->pb2);
4447                     flush_put_bits(&s->pb2);
4448                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
4449                     s->pb2= backup_s.pb2;
4450                     
4451                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
4452                     flush_put_bits(&s->tex_pb);
4453                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
4454                     s->tex_pb= backup_s.tex_pb;
4455                 }
4456                 s->last_bits= put_bits_count(&s->pb);
4457                
4458 #ifdef CONFIG_RISKY
4459                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4460                     ff_h263_update_motion_val(s);
4461 #endif
4462         
4463                 if(next_block==0){ //FIXME 16 vs linesize16
4464                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
4465                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
4466                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
4467                 }
4468
4469                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
4470                     MPV_decode_mb(s, s->block);
4471             } else {
4472                 int motion_x, motion_y;
4473                 s->mv_type=MV_TYPE_16X16;
4474                 // only one MB-Type possible
4475                 
4476                 switch(mb_type){
4477                 case CANDIDATE_MB_TYPE_INTRA:
4478                     s->mv_dir = 0;
4479                     s->mb_intra= 1;
4480                     motion_x= s->mv[0][0][0] = 0;
4481                     motion_y= s->mv[0][0][1] = 0;
4482                     break;
4483                 case CANDIDATE_MB_TYPE_INTER:
4484                     s->mv_dir = MV_DIR_FORWARD;
4485                     s->mb_intra= 0;
4486                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
4487                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
4488                     break;
4489                 case CANDIDATE_MB_TYPE_INTER_I:
4490                     s->mv_dir = MV_DIR_FORWARD;
4491                     s->mv_type = MV_TYPE_FIELD;
4492                     s->mb_intra= 0;
4493                     for(i=0; i<2; i++){
4494                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
4495                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
4496                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
4497                     }
4498                     motion_x = motion_y = 0;
4499                     break;
4500                 case CANDIDATE_MB_TYPE_INTER4V:
4501                     s->mv_dir = MV_DIR_FORWARD;
4502                     s->mv_type = MV_TYPE_8X8;
4503                     s->mb_intra= 0;
4504                     for(i=0; i<4; i++){
4505                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4506                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4507                     }
4508                     motion_x= motion_y= 0;
4509                     break;
4510                 case CANDIDATE_MB_TYPE_DIRECT:
4511                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4512                     s->mb_intra= 0;
4513                     motion_x=s->b_direct_mv_table[xy][0];
4514                     motion_y=s->b_direct_mv_table[xy][1];
4515 #ifdef CONFIG_RISKY
4516                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
4517 #endif
4518                     break;
4519                 case CANDIDATE_MB_TYPE_BIDIR:
4520                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4521                     s->mb_intra= 0;
4522                     motion_x=0;
4523                     motion_y=0;
4524                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4525                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4526                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4527                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4528                     break;
4529                 case CANDIDATE_MB_TYPE_BACKWARD:
4530                     s->mv_dir = MV_DIR_BACKWARD;
4531                     s->mb_intra= 0;
4532                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4533                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4534                     break;
4535                 case CANDIDATE_MB_TYPE_FORWARD:
4536                     s->mv_dir = MV_DIR_FORWARD;
4537                     s->mb_intra= 0;
4538                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4539                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4540 //                    printf(" %d %d ", motion_x, motion_y);
4541                     break;
4542                 case CANDIDATE_MB_TYPE_FORWARD_I:
4543                     s->mv_dir = MV_DIR_FORWARD;
4544                     s->mv_type = MV_TYPE_FIELD;
4545                     s->mb_intra= 0;
4546                     for(i=0; i<2; i++){
4547                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
4548                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
4549                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
4550                     }
4551                     motion_x=motion_y=0;
4552                     break;
4553                 case CANDIDATE_MB_TYPE_BACKWARD_I:
4554                     s->mv_dir = MV_DIR_BACKWARD;
4555                     s->mv_type = MV_TYPE_FIELD;
4556                     s->mb_intra= 0;
4557                     for(i=0; i<2; i++){
4558                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
4559                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
4560                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
4561                     }
4562                     motion_x=motion_y=0;
4563                     break;
4564                 case CANDIDATE_MB_TYPE_BIDIR_I:
4565                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4566                     s->mv_type = MV_TYPE_FIELD;
4567                     s->mb_intra= 0;
4568                     for(dir=0; dir<2; dir++){
4569                         for(i=0; i<2; i++){
4570                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
4571                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
4572                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
4573                         }
4574                     }
4575                     motion_x=motion_y=0;
4576                     break;
4577                 default:
4578                     motion_x=motion_y=0; //gcc warning fix
4579                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
4580                 }
4581
4582                 encode_mb(s, motion_x, motion_y);
4583
4584                 // RAL: Update last macrobloc type
4585                 s->last_mv_dir = s->mv_dir;
4586             
4587 #ifdef CONFIG_RISKY
4588                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4589                     ff_h263_update_motion_val(s);
4590 #endif
4591                 
4592                 MPV_decode_mb(s, s->block);
4593             }
4594
4595             /* clean the MV table in IPS frames for direct mode in B frames */
4596             if(s->mb_intra /* && I,P,S_TYPE */){
4597                 s->p_mv_table[xy][0]=0;
4598                 s->p_mv_table[xy][1]=0;
4599             }
4600             
4601             if(s->flags&CODEC_FLAG_PSNR){
4602                 int w= 16;
4603                 int h= 16;
4604
4605                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4606                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4607
4608                 s->current_picture_ptr->error[0] += sse(
4609                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
4610                     s->dest[0], w, h, s->linesize);
4611                 s->current_picture_ptr->error[1] += sse(
4612                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4613                     s->dest[1], w>>1, h>>1, s->uvlinesize);
4614                 s->current_picture_ptr->error[2] += sse(
4615                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4616                     s->dest[2], w>>1, h>>1, s->uvlinesize);
4617             }
4618             if(s->loop_filter)
4619                 ff_h263_loop_filter(s);
4620 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
4621         }
4622     }
4623
4624 #ifdef CONFIG_RISKY
4625     //not beautifull here but we must write it before flushing so it has to be here
4626     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
4627         msmpeg4_encode_ext_header(s);
4628 #endif
4629
4630     write_slice_end(s);
4631
4632     /* Send the last GOB if RTP */    
4633     if (s->avctx->rtp_callback) {
4634         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
4635         /* Call the RTP callback to send the last GOB */
4636         emms_c();
4637         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, 0);
4638     }
4639
4640     return 0;
4641 }
4642
4643 #define MERGE(field) dst->field += src->field; src->field=0
4644 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
4645     MERGE(me.scene_change_score);
4646     MERGE(me.mc_mb_var_sum_temp);
4647     MERGE(me.mb_var_sum_temp);
4648 }
4649
4650 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
4651     int i;
4652
4653     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
4654     MERGE(dct_count[1]);
4655     MERGE(mv_bits);
4656     MERGE(i_tex_bits);
4657     MERGE(p_tex_bits);
4658     MERGE(i_count);
4659     MERGE(f_count);
4660     MERGE(b_count);
4661     MERGE(skip_count);
4662     MERGE(misc_bits);
4663     MERGE(error_count);
4664     MERGE(padding_bug_score);
4665
4666     if(dst->avctx->noise_reduction){
4667         for(i=0; i<64; i++){
4668             MERGE(dct_error_sum[0][i]);
4669             MERGE(dct_error_sum[1][i]);
4670         }
4671     }
4672     
4673     assert(put_bits_count(&src->pb) % 8 ==0);
4674     assert(put_bits_count(&dst->pb) % 8 ==0);
4675     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
4676     flush_put_bits(&dst->pb);
4677 }
4678
4679 static void encode_picture(MpegEncContext *s, int picture_number)
4680 {
4681     int i;
4682     int bits;
4683
4684     s->picture_number = picture_number;
4685     
4686     /* Reset the average MB variance */
4687     s->me.mb_var_sum_temp    =
4688     s->me.mc_mb_var_sum_temp = 0;
4689
4690 #ifdef CONFIG_RISKY
4691     /* we need to initialize some time vars before we can encode b-frames */
4692     // RAL: Condition added for MPEG1VIDEO
4693     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
4694         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
4695 #endif
4696         
4697     s->me.scene_change_score=0;
4698     
4699 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
4700     
4701     if(s->pict_type==I_TYPE){
4702         if(s->msmpeg4_version >= 3) s->no_rounding=1;
4703         else                        s->no_rounding=0;
4704     }else if(s->pict_type!=B_TYPE){
4705         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
4706             s->no_rounding ^= 1;          
4707     }
4708     
4709     s->mb_intra=0; //for the rate distoration & bit compare functions
4710     for(i=1; i<s->avctx->thread_count; i++){
4711         ff_update_duplicate_context(s->thread_context[i], s);
4712     }
4713
4714     ff_init_me(s);
4715
4716     /* Estimate motion for every MB */
4717     if(s->pict_type != I_TYPE){
4718         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
4719             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
4720                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4721             }
4722         }
4723
4724         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4725     }else /* if(s->pict_type == I_TYPE) */{
4726         /* I-Frame */
4727         for(i=0; i<s->mb_stride*s->mb_height; i++)
4728             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
4729         
4730         if(!s->fixed_qscale){
4731             /* finding spatial complexity for I-frame rate control */
4732             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4733         }
4734     }
4735     for(i=1; i<s->avctx->thread_count; i++){
4736         merge_context_after_me(s, s->thread_context[i]);
4737     }
4738     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
4739     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
4740     emms_c();
4741
4742     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
4743         s->pict_type= I_TYPE;
4744         for(i=0; i<s->mb_stride*s->mb_height; i++)
4745             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
4746 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
4747     }
4748
4749     if(!s->umvplus){
4750         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
4751             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
4752
4753             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4754                 int a,b;
4755                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
4756                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
4757                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
4758             }
4759                     
4760             ff_fix_long_p_mvs(s);
4761             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
4762             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4763                 int j;
4764                 for(i=0; i<2; i++){
4765                     for(j=0; j<2; j++)
4766                         ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
4767                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
4768                 }
4769             }
4770         }
4771
4772         if(s->pict_type==B_TYPE){
4773             int a, b;
4774
4775             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
4776             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
4777             s->f_code = FFMAX(a, b);
4778
4779             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
4780             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
4781             s->b_code = FFMAX(a, b);
4782
4783             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
4784             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
4785             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
4786             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
4787             if(s->flags & CODEC_FLAG_INTERLACED_ME){
4788                 int dir, j;
4789                 for(dir=0; dir<2; dir++){
4790                     for(i=0; i<2; i++){
4791                         for(j=0; j<2; j++){
4792                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
4793                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
4794                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
4795                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
4796                         }
4797                     }
4798                 }
4799             }
4800         }
4801     }
4802
4803     if (!s->fixed_qscale) 
4804         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
4805
4806     if(s->adaptive_quant){
4807 #ifdef CONFIG_RISKY
4808         switch(s->codec_id){
4809         case CODEC_ID_MPEG4:
4810             ff_clean_mpeg4_qscales(s);
4811             break;
4812         case CODEC_ID_H263:
4813         case CODEC_ID_H263P:
4814         case CODEC_ID_FLV1:
4815             ff_clean_h263_qscales(s);
4816             break;
4817         }
4818 #endif
4819
4820         s->lambda= s->lambda_table[0];
4821         //FIXME broken
4822     }else
4823         s->lambda= s->current_picture.quality;
4824 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
4825     update_qscale(s);
4826     
4827     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
4828         s->qscale= 3; //reduce cliping problems
4829         
4830     if (s->out_format == FMT_MJPEG) {
4831         /* for mjpeg, we do include qscale in the matrix */
4832         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
4833         for(i=1;i<64;i++){
4834             int j= s->dsp.idct_permutation[i];
4835
4836             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
4837         }
4838         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
4839                        s->intra_matrix, s->intra_quant_bias, 8, 8);
4840         s->qscale= 8;
4841     }
4842     
4843     //FIXME var duplication
4844     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
4845     s->current_picture.pict_type= s->pict_type;
4846
4847     if(s->current_picture.key_frame)
4848         s->picture_in_gop_number=0;
4849
4850     s->last_bits= put_bits_count(&s->pb);
4851     switch(s->out_format) {
4852     case FMT_MJPEG:
4853         mjpeg_picture_header(s);
4854         break;
4855 #ifdef CONFIG_RISKY
4856     case FMT_H263:
4857         if (s->codec_id == CODEC_ID_WMV2) 
4858             ff_wmv2_encode_picture_header(s, picture_number);
4859         else if (s->h263_msmpeg4) 
4860             msmpeg4_encode_picture_header(s, picture_number);
4861         else if (s->h263_pred)
4862             mpeg4_encode_picture_header(s, picture_number);
4863         else if (s->codec_id == CODEC_ID_RV10) 
4864             rv10_encode_picture_header(s, picture_number);
4865         else if (s->codec_id == CODEC_ID_FLV1)
4866             ff_flv_encode_picture_header(s, picture_number);
4867         else
4868             h263_encode_picture_header(s, picture_number);
4869         break;
4870 #endif
4871     case FMT_MPEG1:
4872         mpeg1_encode_picture_header(s, picture_number);
4873         break;
4874     case FMT_H264:
4875         break;
4876     default:
4877         assert(0);
4878     }
4879     bits= put_bits_count(&s->pb);
4880     s->header_bits= bits - s->last_bits;
4881         
4882     for(i=1; i<s->avctx->thread_count; i++){
4883         update_duplicate_context_after_me(s->thread_context[i], s);
4884     }
4885     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
4886     for(i=1; i<s->avctx->thread_count; i++){
4887         merge_context_after_encode(s, s->thread_context[i]);
4888     }
4889     emms_c();
4890 }
4891
4892 #endif //CONFIG_ENCODERS
4893
4894 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
4895     const int intra= s->mb_intra;
4896     int i;
4897
4898     s->dct_count[intra]++;
4899
4900     for(i=0; i<64; i++){
4901         int level= block[i];
4902
4903         if(level){
4904             if(level>0){
4905                 s->dct_error_sum[intra][i] += level;
4906                 level -= s->dct_offset[intra][i];
4907                 if(level<0) level=0;
4908             }else{
4909                 s->dct_error_sum[intra][i] -= level;
4910                 level += s->dct_offset[intra][i];
4911                 if(level>0) level=0;
4912             }
4913             block[i]= level;
4914         }
4915     }
4916 }
4917
4918 #ifdef CONFIG_ENCODERS
4919
4920 static int dct_quantize_trellis_c(MpegEncContext *s, 
4921                         DCTELEM *block, int n,
4922                         int qscale, int *overflow){
4923     const int *qmat;
4924     const uint8_t *scantable= s->intra_scantable.scantable;
4925     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4926     int max=0;
4927     unsigned int threshold1, threshold2;
4928     int bias=0;
4929     int run_tab[65];
4930     int level_tab[65];
4931     int score_tab[65];
4932     int survivor[65];
4933     int survivor_count;
4934     int last_run=0;
4935     int last_level=0;
4936     int last_score= 0;
4937     int last_i;
4938     int coeff[2][64];
4939     int coeff_count[64];
4940     int qmul, qadd, start_i, last_non_zero, i, dc;
4941     const int esc_length= s->ac_esc_length;
4942     uint8_t * length;
4943     uint8_t * last_length;
4944     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
4945         
4946     s->dsp.fdct (block);
4947     
4948     if(s->dct_error_sum)
4949         s->denoise_dct(s, block);
4950     qmul= qscale*16;
4951     qadd= ((qscale-1)|1)*8;
4952
4953     if (s->mb_intra) {
4954         int q;
4955         if (!s->h263_aic) {
4956             if (n < 4)
4957                 q = s->y_dc_scale;
4958             else
4959                 q = s->c_dc_scale;
4960             q = q << 3;
4961         } else{
4962             /* For AIC we skip quant/dequant of INTRADC */
4963             q = 1 << 3;
4964             qadd=0;
4965         }
4966             
4967         /* note: block[0] is assumed to be positive */
4968         block[0] = (block[0] + (q >> 1)) / q;
4969         start_i = 1;
4970         last_non_zero = 0;
4971         qmat = s->q_intra_matrix[qscale];
4972         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4973             bias= 1<<(QMAT_SHIFT-1);
4974         length     = s->intra_ac_vlc_length;
4975         last_length= s->intra_ac_vlc_last_length;
4976     } else {
4977         start_i = 0;
4978         last_non_zero = -1;
4979         qmat = s->q_inter_matrix[qscale];
4980         length     = s->inter_ac_vlc_length;
4981         last_length= s->inter_ac_vlc_last_length;
4982     }
4983     last_i= start_i;
4984
4985     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4986     threshold2= (threshold1<<1);
4987
4988     for(i=63; i>=start_i; i--) {
4989         const int j = scantable[i];
4990         int level = block[j] * qmat[j];
4991
4992         if(((unsigned)(level+threshold1))>threshold2){
4993             last_non_zero = i;
4994             break;
4995         }
4996     }
4997
4998     for(i=start_i; i<=last_non_zero; i++) {
4999         const int j = scantable[i];
5000         int level = block[j] * qmat[j];
5001
5002 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5003 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5004         if(((unsigned)(level+threshold1))>threshold2){
5005             if(level>0){
5006                 level= (bias + level)>>QMAT_SHIFT;
5007                 coeff[0][i]= level;
5008                 coeff[1][i]= level-1;
5009 //                coeff[2][k]= level-2;
5010             }else{
5011                 level= (bias - level)>>QMAT_SHIFT;
5012                 coeff[0][i]= -level;
5013                 coeff[1][i]= -level+1;
5014 //                coeff[2][k]= -level+2;
5015             }
5016             coeff_count[i]= FFMIN(level, 2);
5017             assert(coeff_count[i]);
5018             max |=level;
5019         }else{
5020             coeff[0][i]= (level>>31)|1;
5021             coeff_count[i]= 1;
5022         }
5023     }
5024     
5025     *overflow= s->max_qcoeff < max; //overflow might have happend
5026     
5027     if(last_non_zero < start_i){
5028         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5029         return last_non_zero;
5030     }
5031
5032     score_tab[start_i]= 0;
5033     survivor[0]= start_i;
5034     survivor_count= 1;
5035     
5036     for(i=start_i; i<=last_non_zero; i++){
5037         int level_index, j;
5038         const int dct_coeff= ABS(block[ scantable[i] ]);
5039         const int zero_distoration= dct_coeff*dct_coeff;
5040         int best_score=256*256*256*120;
5041         for(level_index=0; level_index < coeff_count[i]; level_index++){
5042             int distoration;
5043             int level= coeff[level_index][i];
5044             const int alevel= ABS(level);
5045             int unquant_coeff;
5046             
5047             assert(level);
5048
5049             if(s->out_format == FMT_H263){
5050                 unquant_coeff= alevel*qmul + qadd;
5051             }else{ //MPEG1
5052                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5053                 if(s->mb_intra){
5054                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5055                         unquant_coeff =   (unquant_coeff - 1) | 1;
5056                 }else{
5057                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5058                         unquant_coeff =   (unquant_coeff - 1) | 1;
5059                 }
5060                 unquant_coeff<<= 3;
5061             }
5062
5063             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5064             level+=64;
5065             if((level&(~127)) == 0){
5066                 for(j=survivor_count-1; j>=0; j--){
5067                     int run= i - survivor[j];
5068                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5069                     score += score_tab[i-run];
5070                     
5071                     if(score < best_score){
5072                         best_score= score;
5073                         run_tab[i+1]= run;
5074                         level_tab[i+1]= level-64;
5075                     }
5076                 }
5077
5078                 if(s->out_format == FMT_H263){
5079                     for(j=survivor_count-1; j>=0; j--){
5080                         int run= i - survivor[j];
5081                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5082                         score += score_tab[i-run];
5083                         if(score < last_score){
5084                             last_score= score;
5085                             last_run= run;
5086                             last_level= level-64;
5087                             last_i= i+1;
5088                         }
5089                     }
5090                 }
5091             }else{
5092                 distoration += esc_length*lambda;
5093                 for(j=survivor_count-1; j>=0; j--){
5094                     int run= i - survivor[j];
5095                     int score= distoration + score_tab[i-run];
5096                     
5097                     if(score < best_score){
5098                         best_score= score;
5099                         run_tab[i+1]= run;
5100                         level_tab[i+1]= level-64;
5101                     }
5102                 }
5103
5104                 if(s->out_format == FMT_H263){
5105                   for(j=survivor_count-1; j>=0; j--){
5106                         int run= i - survivor[j];
5107                         int score= distoration + score_tab[i-run];
5108                         if(score < last_score){
5109                             last_score= score;
5110                             last_run= run;
5111                             last_level= level-64;
5112                             last_i= i+1;
5113                         }
5114                     }
5115                 }
5116             }
5117         }
5118         
5119         score_tab[i+1]= best_score;
5120
5121         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5122         if(last_non_zero <= 27){
5123             for(; survivor_count; survivor_count--){
5124                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5125                     break;
5126             }
5127         }else{
5128             for(; survivor_count; survivor_count--){
5129                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5130                     break;
5131             }
5132         }
5133
5134         survivor[ survivor_count++ ]= i+1;
5135     }
5136
5137     if(s->out_format != FMT_H263){
5138         last_score= 256*256*256*120;
5139         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5140             int score= score_tab[i];
5141             if(i) score += lambda*2; //FIXME exacter?
5142
5143             if(score < last_score){
5144                 last_score= score;
5145                 last_i= i;
5146                 last_level= level_tab[i];
5147                 last_run= run_tab[i];
5148             }
5149         }
5150     }
5151
5152     s->coded_score[n] = last_score;
5153     
5154     dc= ABS(block[0]);
5155     last_non_zero= last_i - 1;
5156     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5157     
5158     if(last_non_zero < start_i)
5159         return last_non_zero;
5160
5161     if(last_non_zero == 0 && start_i == 0){
5162         int best_level= 0;
5163         int best_score= dc * dc;
5164         
5165         for(i=0; i<coeff_count[0]; i++){
5166             int level= coeff[i][0];
5167             int alevel= ABS(level);
5168             int unquant_coeff, score, distortion;
5169
5170             if(s->out_format == FMT_H263){
5171                     unquant_coeff= (alevel*qmul + qadd)>>3;
5172             }else{ //MPEG1
5173                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5174                     unquant_coeff =   (unquant_coeff - 1) | 1;
5175             }
5176             unquant_coeff = (unquant_coeff + 4) >> 3;
5177             unquant_coeff<<= 3 + 3;
5178
5179             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5180             level+=64;
5181             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5182             else                    score= distortion + esc_length*lambda;
5183
5184             if(score < best_score){
5185                 best_score= score;
5186                 best_level= level - 64;
5187             }
5188         }
5189         block[0]= best_level;
5190         s->coded_score[n] = best_score - dc*dc;
5191         if(best_level == 0) return -1;
5192         else                return last_non_zero;
5193     }
5194
5195     i= last_i;
5196     assert(last_level);
5197
5198     block[ perm_scantable[last_non_zero] ]= last_level;
5199     i -= last_run + 1;
5200     
5201     for(; i>start_i; i -= run_tab[i] + 1){
5202         block[ perm_scantable[i-1] ]= level_tab[i];
5203     }
5204
5205     return last_non_zero;
5206 }
5207
5208 //#define REFINE_STATS 1
5209 static int16_t basis[64][64];
5210
5211 static void build_basis(uint8_t *perm){
5212     int i, j, x, y;
5213     emms_c();
5214     for(i=0; i<8; i++){
5215         for(j=0; j<8; j++){
5216             for(y=0; y<8; y++){
5217                 for(x=0; x<8; x++){
5218                     double s= 0.25*(1<<BASIS_SHIFT);
5219                     int index= 8*i + j;
5220                     int perm_index= perm[index];
5221                     if(i==0) s*= sqrt(0.5);
5222                     if(j==0) s*= sqrt(0.5);
5223                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
5224                 }
5225             }
5226         }
5227     }
5228 }
5229
5230 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
5231                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
5232                         int n, int qscale){
5233     int16_t rem[64];
5234     DCTELEM d1[64];
5235     const int *qmat;
5236     const uint8_t *scantable= s->intra_scantable.scantable;
5237     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5238 //    unsigned int threshold1, threshold2;
5239 //    int bias=0;
5240     int run_tab[65];
5241     int prev_run=0;
5242     int prev_level=0;
5243     int qmul, qadd, start_i, last_non_zero, i, dc;
5244     uint8_t * length;
5245     uint8_t * last_length;
5246     int lambda;
5247     int rle_index, run, q, sum;
5248 #ifdef REFINE_STATS
5249 static int count=0;
5250 static int after_last=0;
5251 static int to_zero=0;
5252 static int from_zero=0;
5253 static int raise=0;
5254 static int lower=0;
5255 static int messed_sign=0;
5256 #endif
5257
5258     if(basis[0][0] == 0)
5259         build_basis(s->dsp.idct_permutation);
5260     
5261     qmul= qscale*2;
5262     qadd= (qscale-1)|1;
5263     if (s->mb_intra) {
5264         if (!s->h263_aic) {
5265             if (n < 4)
5266                 q = s->y_dc_scale;
5267             else
5268                 q = s->c_dc_scale;
5269         } else{
5270             /* For AIC we skip quant/dequant of INTRADC */
5271             q = 1;
5272             qadd=0;
5273         }
5274         q <<= RECON_SHIFT-3;
5275         /* note: block[0] is assumed to be positive */
5276         dc= block[0]*q;
5277 //        block[0] = (block[0] + (q >> 1)) / q;
5278         start_i = 1;
5279         qmat = s->q_intra_matrix[qscale];
5280 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5281 //            bias= 1<<(QMAT_SHIFT-1);
5282         length     = s->intra_ac_vlc_length;
5283         last_length= s->intra_ac_vlc_last_length;
5284     } else {
5285         dc= 0;
5286         start_i = 0;
5287         qmat = s->q_inter_matrix[qscale];
5288         length     = s->inter_ac_vlc_length;
5289         last_length= s->inter_ac_vlc_last_length;
5290     }
5291     last_non_zero = s->block_last_index[n];
5292
5293 #ifdef REFINE_STATS
5294 {START_TIMER
5295 #endif
5296     dc += (1<<(RECON_SHIFT-1));
5297     for(i=0; i<64; i++){
5298         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly insteadof copying to rem[]
5299     }
5300 #ifdef REFINE_STATS
5301 STOP_TIMER("memset rem[]")}
5302 #endif
5303     sum=0;
5304     for(i=0; i<64; i++){
5305         int one= 36;
5306         int qns=4;
5307         int w;
5308
5309         w= ABS(weight[i]) + qns*one;
5310         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
5311
5312         weight[i] = w;
5313 //        w=weight[i] = (63*qns + (w/2)) / w;
5314          
5315         assert(w>0);
5316         assert(w<(1<<6));
5317         sum += w*w;
5318     }
5319     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
5320 #ifdef REFINE_STATS
5321 {START_TIMER
5322 #endif
5323     run=0;
5324     rle_index=0;
5325     for(i=start_i; i<=last_non_zero; i++){
5326         int j= perm_scantable[i];
5327         const int level= block[j];
5328         int coeff;
5329         
5330         if(level){
5331             if(level<0) coeff= qmul*level - qadd;
5332             else        coeff= qmul*level + qadd;
5333             run_tab[rle_index++]=run;
5334             run=0;
5335
5336             s->dsp.add_8x8basis(rem, basis[j], coeff);
5337         }else{
5338             run++;
5339         }
5340     }
5341 #ifdef REFINE_STATS
5342 if(last_non_zero>0){
5343 STOP_TIMER("init rem[]")
5344 }
5345 }
5346
5347 {START_TIMER
5348 #endif
5349     for(;;){
5350         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
5351         int best_coeff=0;
5352         int best_change=0;
5353         int run2, best_unquant_change=0, analyze_gradient;
5354 #ifdef REFINE_STATS
5355 {START_TIMER
5356 #endif
5357         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
5358
5359         if(analyze_gradient){
5360 #ifdef REFINE_STATS
5361 {START_TIMER
5362 #endif
5363             for(i=0; i<64; i++){
5364                 int w= weight[i];
5365             
5366                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
5367             }
5368 #ifdef REFINE_STATS
5369 STOP_TIMER("rem*w*w")}
5370 {START_TIMER
5371 #endif
5372             s->dsp.fdct(d1);
5373 #ifdef REFINE_STATS
5374 STOP_TIMER("dct")}
5375 #endif
5376         }
5377
5378         if(start_i){
5379             const int level= block[0];
5380             int change, old_coeff;
5381
5382             assert(s->mb_intra);
5383             
5384             old_coeff= q*level;
5385             
5386             for(change=-1; change<=1; change+=2){
5387                 int new_level= level + change;
5388                 int score, new_coeff;
5389                 
5390                 new_coeff= q*new_level;
5391                 if(new_coeff >= 2048 || new_coeff < 0)
5392                     continue;
5393
5394                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
5395                 if(score<best_score){
5396                     best_score= score;
5397                     best_coeff= 0;
5398                     best_change= change;
5399                     best_unquant_change= new_coeff - old_coeff;
5400                 }
5401             }
5402         }
5403         
5404         run=0;
5405         rle_index=0;
5406         run2= run_tab[rle_index++];
5407         prev_level=0;
5408         prev_run=0;
5409
5410         for(i=start_i; i<64; i++){
5411             int j= perm_scantable[i];
5412             const int level= block[j];
5413             int change, old_coeff;
5414
5415             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
5416                 break;
5417
5418             if(level){
5419                 if(level<0) old_coeff= qmul*level - qadd;
5420                 else        old_coeff= qmul*level + qadd;
5421                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
5422             }else{
5423                 old_coeff=0;
5424                 run2--;
5425                 assert(run2>=0 || i >= last_non_zero );
5426             }
5427             
5428             for(change=-1; change<=1; change+=2){
5429                 int new_level= level + change;
5430                 int score, new_coeff, unquant_change;
5431                 
5432                 score=0;
5433                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
5434                    continue;
5435
5436                 if(new_level){
5437                     if(new_level<0) new_coeff= qmul*new_level - qadd;
5438                     else            new_coeff= qmul*new_level + qadd;
5439                     if(new_coeff >= 2048 || new_coeff <= -2048)
5440                         continue;
5441                     //FIXME check for overflow
5442                     
5443                     if(level){
5444                         if(level < 63 && level > -63){
5445                             if(i < last_non_zero)
5446                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
5447                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
5448                             else
5449                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
5450                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
5451                         }
5452                     }else{
5453                         assert(ABS(new_level)==1);
5454                         
5455                         if(analyze_gradient){
5456                             int g= d1[ scantable[i] ];
5457                             if(g && (g^new_level) >= 0)
5458                                 continue;
5459                         }
5460
5461                         if(i < last_non_zero){
5462                             int next_i= i + run2 + 1;
5463                             int next_level= block[ perm_scantable[next_i] ] + 64;
5464                             
5465                             if(next_level&(~127))
5466                                 next_level= 0;
5467
5468                             if(next_i < last_non_zero)
5469                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
5470                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
5471                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5472                             else
5473                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
5474                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5475                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
5476                         }else{
5477                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
5478                             if(prev_level){
5479                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5480                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5481                             }
5482                         }
5483                     }
5484                 }else{
5485                     new_coeff=0;
5486                     assert(ABS(level)==1);
5487
5488                     if(i < last_non_zero){
5489                         int next_i= i + run2 + 1;
5490                         int next_level= block[ perm_scantable[next_i] ] + 64;
5491                             
5492                         if(next_level&(~127))
5493                             next_level= 0;
5494
5495                         if(next_i < last_non_zero)
5496                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
5497                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
5498                                      - length[UNI_AC_ENC_INDEX(run, 65)];
5499                         else
5500                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
5501                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
5502                                      - length[UNI_AC_ENC_INDEX(run, 65)];
5503                     }else{
5504                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
5505                         if(prev_level){
5506                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
5507                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
5508                         }
5509                     }
5510                 }
5511                 
5512                 score *= lambda;
5513
5514                 unquant_change= new_coeff - old_coeff;
5515                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
5516                 
5517                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
5518                 if(score<best_score){
5519                     best_score= score;
5520                     best_coeff= i;
5521                     best_change= change;
5522                     best_unquant_change= unquant_change;
5523                 }
5524             }
5525             if(level){
5526                 prev_level= level + 64;
5527                 if(prev_level&(~127))
5528                     prev_level= 0;
5529                 prev_run= run;
5530                 run=0;
5531             }else{
5532                 run++;
5533             }
5534         }
5535 #ifdef REFINE_STATS
5536 STOP_TIMER("iterative step")}
5537 #endif
5538
5539         if(best_change){
5540             int j= perm_scantable[ best_coeff ];
5541             
5542             block[j] += best_change;
5543             
5544             if(best_coeff > last_non_zero){
5545                 last_non_zero= best_coeff;
5546                 assert(block[j]);
5547 #ifdef REFINE_STATS
5548 after_last++;
5549 #endif
5550             }else{
5551 #ifdef REFINE_STATS
5552 if(block[j]){
5553     if(block[j] - best_change){
5554         if(ABS(block[j]) > ABS(block[j] - best_change)){
5555             raise++;
5556         }else{
5557             lower++;
5558         }
5559     }else{
5560         from_zero++;
5561     }
5562 }else{
5563     to_zero++;
5564 }
5565 #endif
5566                 for(; last_non_zero>=start_i; last_non_zero--){
5567                     if(block[perm_scantable[last_non_zero]])
5568                         break;
5569                 }
5570             }
5571 #ifdef REFINE_STATS
5572 count++;
5573 if(256*256*256*64 % count == 0){
5574     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
5575 }
5576 #endif
5577             run=0;
5578             rle_index=0;
5579             for(i=start_i; i<=last_non_zero; i++){
5580                 int j= perm_scantable[i];
5581                 const int level= block[j];
5582         
5583                  if(level){
5584                      run_tab[rle_index++]=run;
5585                      run=0;
5586                  }else{
5587                      run++;
5588                  }
5589             }
5590             
5591             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
5592         }else{
5593             break;
5594         }
5595     }
5596 #ifdef REFINE_STATS
5597 if(last_non_zero>0){
5598 STOP_TIMER("iterative search")
5599 }
5600 }
5601 #endif
5602
5603     return last_non_zero;
5604 }
5605
5606 static int dct_quantize_c(MpegEncContext *s, 
5607                         DCTELEM *block, int n,
5608                         int qscale, int *overflow)
5609 {
5610     int i, j, level, last_non_zero, q, start_i;
5611     const int *qmat;
5612     const uint8_t *scantable= s->intra_scantable.scantable;
5613     int bias;
5614     int max=0;
5615     unsigned int threshold1, threshold2;
5616
5617     s->dsp.fdct (block);
5618
5619     if(s->dct_error_sum)
5620         s->denoise_dct(s, block);
5621
5622     if (s->mb_intra) {
5623         if (!s->h263_aic) {
5624             if (n < 4)
5625                 q = s->y_dc_scale;
5626             else
5627                 q = s->c_dc_scale;
5628             q = q << 3;
5629         } else
5630             /* For AIC we skip quant/dequant of INTRADC */
5631             q = 1 << 3;
5632             
5633         /* note: block[0] is assumed to be positive */
5634         block[0] = (block[0] + (q >> 1)) / q;
5635         start_i = 1;
5636         last_non_zero = 0;
5637         qmat = s->q_intra_matrix[qscale];
5638         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
5639     } else {
5640         start_i = 0;
5641         last_non_zero = -1;
5642         qmat = s->q_inter_matrix[qscale];
5643         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
5644     }
5645     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5646     threshold2= (threshold1<<1);
5647     for(i=63;i>=start_i;i--) {
5648         j = scantable[i];
5649         level = block[j] * qmat[j];
5650
5651         if(((unsigned)(level+threshold1))>threshold2){
5652             last_non_zero = i;
5653             break;
5654         }else{
5655             block[j]=0;
5656         }
5657     }
5658     for(i=start_i; i<=last_non_zero; i++) {
5659         j = scantable[i];
5660         level = block[j] * qmat[j];
5661
5662 //        if(   bias+level >= (1<<QMAT_SHIFT)
5663 //           || bias-level >= (1<<QMAT_SHIFT)){
5664         if(((unsigned)(level+threshold1))>threshold2){
5665             if(level>0){
5666                 level= (bias + level)>>QMAT_SHIFT;
5667                 block[j]= level;
5668             }else{
5669                 level= (bias - level)>>QMAT_SHIFT;
5670                 block[j]= -level;
5671             }
5672             max |=level;
5673         }else{
5674             block[j]=0;
5675         }
5676     }
5677     *overflow= s->max_qcoeff < max; //overflow might have happend
5678     
5679     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
5680     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
5681         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
5682
5683     return last_non_zero;
5684 }
5685
5686 #endif //CONFIG_ENCODERS
5687
5688 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
5689                                    DCTELEM *block, int n, int qscale)
5690 {
5691     int i, level, nCoeffs;
5692     const uint16_t *quant_matrix;
5693
5694     nCoeffs= s->block_last_index[n];
5695     
5696     if (n < 4) 
5697         block[0] = block[0] * s->y_dc_scale;
5698     else
5699         block[0] = block[0] * s->c_dc_scale;
5700     /* XXX: only mpeg1 */
5701     quant_matrix = s->intra_matrix;
5702     for(i=1;i<=nCoeffs;i++) {
5703         int j= s->intra_scantable.permutated[i];
5704         level = block[j];
5705         if (level) {
5706             if (level < 0) {
5707                 level = -level;
5708                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5709                 level = (level - 1) | 1;
5710                 level = -level;
5711             } else {
5712                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5713                 level = (level - 1) | 1;
5714             }
5715             block[j] = level;
5716         }
5717     }
5718 }
5719
5720 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
5721                                    DCTELEM *block, int n, int qscale)
5722 {
5723     int i, level, nCoeffs;
5724     const uint16_t *quant_matrix;
5725
5726     nCoeffs= s->block_last_index[n];
5727     
5728     quant_matrix = s->inter_matrix;
5729     for(i=0; i<=nCoeffs; i++) {
5730         int j= s->intra_scantable.permutated[i];
5731         level = block[j];
5732         if (level) {
5733             if (level < 0) {
5734                 level = -level;
5735                 level = (((level << 1) + 1) * qscale *
5736                          ((int) (quant_matrix[j]))) >> 4;
5737                 level = (level - 1) | 1;
5738                 level = -level;
5739             } else {
5740                 level = (((level << 1) + 1) * qscale *
5741                          ((int) (quant_matrix[j]))) >> 4;
5742                 level = (level - 1) | 1;
5743             }
5744             block[j] = level;
5745         }
5746     }
5747 }
5748
5749 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, 
5750                                    DCTELEM *block, int n, int qscale)
5751 {
5752     int i, level, nCoeffs;
5753     const uint16_t *quant_matrix;
5754
5755     if(s->alternate_scan) nCoeffs= 63;
5756     else nCoeffs= s->block_last_index[n];
5757     
5758     if (n < 4) 
5759         block[0] = block[0] * s->y_dc_scale;
5760     else
5761         block[0] = block[0] * s->c_dc_scale;
5762     quant_matrix = s->intra_matrix;
5763     for(i=1;i<=nCoeffs;i++) {
5764         int j= s->intra_scantable.permutated[i];
5765         level = block[j];
5766         if (level) {
5767             if (level < 0) {
5768                 level = -level;
5769                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5770                 level = -level;
5771             } else {
5772                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
5773             }
5774             block[j] = level;
5775         }
5776     }
5777 }
5778
5779 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, 
5780                                    DCTELEM *block, int n, int qscale)
5781 {
5782     int i, level, nCoeffs;
5783     const uint16_t *quant_matrix;
5784     int sum=-1;
5785
5786     if(s->alternate_scan) nCoeffs= 63;
5787     else nCoeffs= s->block_last_index[n];
5788     
5789     quant_matrix = s->inter_matrix;
5790     for(i=0; i<=nCoeffs; i++) {
5791         int j= s->intra_scantable.permutated[i];
5792         level = block[j];
5793         if (level) {
5794             if (level < 0) {
5795                 level = -level;
5796                 level = (((level << 1) + 1) * qscale *
5797                          ((int) (quant_matrix[j]))) >> 4;
5798                 level = -level;
5799             } else {
5800                 level = (((level << 1) + 1) * qscale *
5801                          ((int) (quant_matrix[j]))) >> 4;
5802             }
5803             block[j] = level;
5804             sum+=level;
5805         }
5806     }
5807     block[63]^=sum&1;
5808 }
5809
5810 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
5811                                   DCTELEM *block, int n, int qscale)
5812 {
5813     int i, level, qmul, qadd;
5814     int nCoeffs;
5815     
5816     assert(s->block_last_index[n]>=0);
5817     
5818     qmul = qscale << 1;
5819     
5820     if (!s->h263_aic) {
5821         if (n < 4) 
5822             block[0] = block[0] * s->y_dc_scale;
5823         else
5824             block[0] = block[0] * s->c_dc_scale;
5825         qadd = (qscale - 1) | 1;
5826     }else{
5827         qadd = 0;
5828     }
5829     if(s->ac_pred)
5830         nCoeffs=63;
5831     else
5832         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5833
5834     for(i=1; i<=nCoeffs; i++) {
5835         level = block[i];
5836         if (level) {
5837             if (level < 0) {
5838                 level = level * qmul - qadd;
5839             } else {
5840                 level = level * qmul + qadd;
5841             }
5842             block[i] = level;
5843         }
5844     }
5845 }
5846
5847 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
5848                                   DCTELEM *block, int n, int qscale)
5849 {
5850     int i, level, qmul, qadd;
5851     int nCoeffs;
5852     
5853     assert(s->block_last_index[n]>=0);
5854     
5855     qadd = (qscale - 1) | 1;
5856     qmul = qscale << 1;
5857     
5858     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5859
5860     for(i=0; i<=nCoeffs; i++) {
5861         level = block[i];
5862         if (level) {
5863             if (level < 0) {
5864                 level = level * qmul - qadd;
5865             } else {
5866                 level = level * qmul + qadd;
5867             }
5868             block[i] = level;
5869         }
5870     }
5871 }
5872
5873 static void dct_unquantize_h261_intra_c(MpegEncContext *s, 
5874                                   DCTELEM *block, int n, int qscale)
5875 {
5876     int i, level, even;
5877     int nCoeffs;
5878     
5879     assert(s->block_last_index[n]>=0);
5880     
5881     if (n < 4) 
5882         block[0] = block[0] * s->y_dc_scale;
5883     else
5884         block[0] = block[0] * s->c_dc_scale;
5885     even = (qscale & 1)^1;
5886     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5887
5888     for(i=1; i<=nCoeffs; i++){
5889         level = block[i];
5890         if (level){
5891             if (level < 0){
5892                 level = qscale * ((level << 1) - 1) + even;
5893             }else{
5894                 level = qscale * ((level << 1) + 1) - even;
5895             }
5896         }
5897         block[i] = level;
5898     }
5899 }
5900
5901 static void dct_unquantize_h261_inter_c(MpegEncContext *s, 
5902                                   DCTELEM *block, int n, int qscale)
5903 {
5904     int i, level, even;
5905     int nCoeffs;
5906     
5907     assert(s->block_last_index[n]>=0);
5908
5909     even = (qscale & 1)^1;
5910     
5911     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
5912
5913     for(i=0; i<=nCoeffs; i++){
5914         level = block[i];
5915         if (level){
5916             if (level < 0){
5917                 level = qscale * ((level << 1) - 1) + even;
5918             }else{
5919                 level = qscale * ((level << 1) + 1) - even;
5920             }
5921         }
5922         block[i] = level;
5923     }
5924 }
5925
5926 static const AVOption mpeg4_options[] =
5927 {
5928     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
5929     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
5930                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
5931                        bit_rate_tolerance, 4, 240000000, 8000),
5932     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
5933     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
5934     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
5935                           rc_eq, "tex^qComp,option1,options2", 0),
5936     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
5937                        rc_min_rate, 4, 24000000, 0),
5938     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
5939                        rc_max_rate, 4, 24000000, 0),
5940     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
5941                           rc_buffer_aggressivity, 4, 24000000, 0),
5942     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
5943                           rc_initial_cplx, 0., 9999999., 0),
5944     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
5945                           i_quant_factor, 0., 0., 0),
5946     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
5947                           i_quant_factor, -999999., 999999., 0),
5948     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
5949                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
5950     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
5951                           lumi_masking, 0., 999999., 0),
5952     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
5953                           temporal_cplx_masking, 0., 999999., 0),
5954     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
5955                           spatial_cplx_masking, 0., 999999., 0),
5956     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
5957                           p_masking, 0., 999999., 0),
5958     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
5959                           dark_masking, 0., 999999., 0),
5960     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
5961                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
5962
5963     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
5964                        mb_qmin, 0, 8, 0),
5965     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
5966                        mb_qmin, 0, 8, 0),
5967
5968     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
5969                        me_cmp, 0, 24000000, 0),
5970     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
5971                        me_sub_cmp, 0, 24000000, 0),
5972
5973
5974     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
5975                        dia_size, 0, 24000000, 0),
5976     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
5977                        last_predictor_count, 0, 24000000, 0),
5978
5979     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
5980                        pre_me, 0, 24000000, 0),
5981     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
5982                        me_pre_cmp, 0, 24000000, 0),
5983
5984     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5985                        me_range, 0, 24000000, 0),
5986     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
5987                        pre_dia_size, 0, 24000000, 0),
5988     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
5989                        me_subpel_quality, 0, 24000000, 0),
5990     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5991                        me_range, 0, 24000000, 0),
5992     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
5993                         flags, CODEC_FLAG_PSNR, 0),
5994     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
5995                               rc_override),
5996     AVOPTION_SUB(avoptions_common),
5997     AVOPTION_END()
5998 };
5999
6000 #ifdef CONFIG_ENCODERS
6001 #ifdef CONFIG_RISKY
6002 AVCodec h263_encoder = {
6003     "h263",
6004     CODEC_TYPE_VIDEO,
6005     CODEC_ID_H263,
6006     sizeof(MpegEncContext),
6007     MPV_encode_init,
6008     MPV_encode_picture,
6009     MPV_encode_end,
6010 };
6011
6012 AVCodec h263p_encoder = {
6013     "h263p",
6014     CODEC_TYPE_VIDEO,
6015     CODEC_ID_H263P,
6016     sizeof(MpegEncContext),
6017     MPV_encode_init,
6018     MPV_encode_picture,
6019     MPV_encode_end,
6020 };
6021
6022 AVCodec flv_encoder = {
6023     "flv",
6024     CODEC_TYPE_VIDEO,
6025     CODEC_ID_FLV1,
6026     sizeof(MpegEncContext),
6027     MPV_encode_init,
6028     MPV_encode_picture,
6029     MPV_encode_end,
6030 };
6031
6032 AVCodec rv10_encoder = {
6033     "rv10",
6034     CODEC_TYPE_VIDEO,
6035     CODEC_ID_RV10,
6036     sizeof(MpegEncContext),
6037     MPV_encode_init,
6038     MPV_encode_picture,
6039     MPV_encode_end,
6040 };
6041
6042 AVCodec mpeg4_encoder = {
6043     "mpeg4",
6044     CODEC_TYPE_VIDEO,
6045     CODEC_ID_MPEG4,
6046     sizeof(MpegEncContext),
6047     MPV_encode_init,
6048     MPV_encode_picture,
6049     MPV_encode_end,
6050     .options = mpeg4_options,
6051     .capabilities= CODEC_CAP_DELAY,
6052 };
6053
6054 AVCodec msmpeg4v1_encoder = {
6055     "msmpeg4v1",
6056     CODEC_TYPE_VIDEO,
6057     CODEC_ID_MSMPEG4V1,
6058     sizeof(MpegEncContext),
6059     MPV_encode_init,
6060     MPV_encode_picture,
6061     MPV_encode_end,
6062     .options = mpeg4_options,
6063 };
6064
6065 AVCodec msmpeg4v2_encoder = {
6066     "msmpeg4v2",
6067     CODEC_TYPE_VIDEO,
6068     CODEC_ID_MSMPEG4V2,
6069     sizeof(MpegEncContext),
6070     MPV_encode_init,
6071     MPV_encode_picture,
6072     MPV_encode_end,
6073     .options = mpeg4_options,
6074 };
6075
6076 AVCodec msmpeg4v3_encoder = {
6077     "msmpeg4",
6078     CODEC_TYPE_VIDEO,
6079     CODEC_ID_MSMPEG4V3,
6080     sizeof(MpegEncContext),
6081     MPV_encode_init,
6082     MPV_encode_picture,
6083     MPV_encode_end,
6084     .options = mpeg4_options,
6085 };
6086
6087 AVCodec wmv1_encoder = {
6088     "wmv1",
6089     CODEC_TYPE_VIDEO,
6090     CODEC_ID_WMV1,
6091     sizeof(MpegEncContext),
6092     MPV_encode_init,
6093     MPV_encode_picture,
6094     MPV_encode_end,
6095     .options = mpeg4_options,
6096 };
6097
6098 #endif
6099
6100 AVCodec mjpeg_encoder = {
6101     "mjpeg",
6102     CODEC_TYPE_VIDEO,
6103     CODEC_ID_MJPEG,
6104     sizeof(MpegEncContext),
6105     MPV_encode_init,
6106     MPV_encode_picture,
6107     MPV_encode_end,
6108 };
6109
6110 #endif //CONFIG_ENCODERS