]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
ctype.h is a common header
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
20  */
21  
22 /**
23  * @file mpegvideo.c
24  * The simplest mpeg encoder (well, it was the simplest!).
25  */ 
26  
27 #include <limits.h>
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31
32 #ifdef USE_FASTMEMCPY
33 #include "fastmemcpy.h"
34 #endif
35
36 //#undef NDEBUG
37 //#include <assert.h>
38
39 #ifdef CONFIG_ENCODERS
40 static void encode_picture(MpegEncContext *s, int picture_number);
41 #endif //CONFIG_ENCODERS
42 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
43                                    DCTELEM *block, int n, int qscale);
44 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_h263_c(MpegEncContext *s, 
47                                   DCTELEM *block, int n, int qscale);
48 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
49 #ifdef CONFIG_ENCODERS
50 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
51 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
52 static int sse_mb(MpegEncContext *s);
53 #endif //CONFIG_ENCODERS
54
55 #ifdef HAVE_XVMC
56 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
57 extern void XVMC_field_end(MpegEncContext *s);
58 extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
59 #endif
60
61 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
62
63
64 /* enable all paranoid tests for rounding, overflows, etc... */
65 //#define PARANOID
66
67 //#define DEBUG
68
69
70 /* for jpeg fast DCT */
71 #define CONST_BITS 14
72
73 static const uint16_t aanscales[64] = {
74     /* precomputed values scaled up by 14 bits */
75     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
76     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
77     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
78     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
79     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
80     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
81     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
82     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
83 };
84
85 static const uint8_t h263_chroma_roundtab[16] = {
86 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
87     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
88 };
89
90 #ifdef CONFIG_ENCODERS
91 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
92 static uint8_t default_fcode_tab[MAX_MV*2+1];
93
94 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
95
96 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
97                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
98 {
99     int qscale;
100
101     for(qscale=qmin; qscale<=qmax; qscale++){
102         int i;
103         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
104             for(i=0;i<64;i++) {
105                 const int j= s->dsp.idct_permutation[i];
106                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
107                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
108                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
109                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
110                 
111                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
112                                 (qscale * quant_matrix[j]));
113             }
114         } else if (s->dsp.fdct == fdct_ifast) {
115             for(i=0;i<64;i++) {
116                 const int j= s->dsp.idct_permutation[i];
117                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
118                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
119                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
120                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
121                 
122                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
123                                 (aanscales[i] * qscale * quant_matrix[j]));
124             }
125         } else {
126             for(i=0;i<64;i++) {
127                 const int j= s->dsp.idct_permutation[i];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                    So 16           <= qscale * quant_matrix[i]             <= 7905
130                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
131                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
132                 */
133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
134 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
135                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
136
137                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
138                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
139             }
140         }
141     }
142 }
143 #endif //CONFIG_ENCODERS
144
145 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
146     int i;
147     int end;
148     
149     st->scantable= src_scantable;
150
151     for(i=0; i<64; i++){
152         int j;
153         j = src_scantable[i];
154         st->permutated[i] = permutation[j];
155 #ifdef ARCH_POWERPC
156         st->inverse[j] = i;
157 #endif
158     }
159     
160     end=-1;
161     for(i=0; i<64; i++){
162         int j;
163         j = st->permutated[i];
164         if(j>end) end=j;
165         st->raster_end[i]= end;
166     }
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
170     int i;
171
172     if(matrix){
173         put_bits(pb, 1, 1);
174         for(i=0;i<64;i++) {
175             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
176         }
177     }else
178         put_bits(pb, 1, 0);
179 }
180
181 /* init common dct for both encoder and decoder */
182 int DCT_common_init(MpegEncContext *s)
183 {
184     s->dct_unquantize_h263 = dct_unquantize_h263_c;
185     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
186     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
187
188 #ifdef CONFIG_ENCODERS
189     s->dct_quantize= dct_quantize_c;
190 #endif
191         
192 #ifdef HAVE_MMX
193     MPV_common_init_mmx(s);
194 #endif
195 #ifdef ARCH_ALPHA
196     MPV_common_init_axp(s);
197 #endif
198 #ifdef HAVE_MLIB
199     MPV_common_init_mlib(s);
200 #endif
201 #ifdef HAVE_MMI
202     MPV_common_init_mmi(s);
203 #endif
204 #ifdef ARCH_ARMV4L
205     MPV_common_init_armv4l(s);
206 #endif
207 #ifdef ARCH_POWERPC
208     MPV_common_init_ppc(s);
209 #endif
210
211 #ifdef CONFIG_ENCODERS
212     s->fast_dct_quantize= s->dct_quantize;
213
214     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
215         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
216     }
217
218 #endif //CONFIG_ENCODERS
219
220     /* load & permutate scantables
221        note: only wmv uses differnt ones 
222     */
223     ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
224     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
225     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
226     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
227
228     s->picture_structure= PICT_FRAME;
229     
230     return 0;
231 }
232
233 /**
234  * allocates a Picture
235  * The pixels are allocated/set by calling get_buffer() if shared=0
236  */
237 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
238     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
239     const int mb_array_size= s->mb_stride*s->mb_height;
240     int i;
241     
242     if(shared){
243         assert(pic->data[0]);
244         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
245         pic->type= FF_BUFFER_TYPE_SHARED;
246     }else{
247         int r;
248         
249         assert(!pic->data[0]);
250         
251         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
252         
253         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
254             fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
255             return -1;
256         }
257
258         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
259             fprintf(stderr, "get_buffer() failed (stride changed)\n");
260             return -1;
261         }
262
263         if(pic->linesize[1] != pic->linesize[2]){
264             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
265             return -1;
266         }
267
268         s->linesize  = pic->linesize[0];
269         s->uvlinesize= pic->linesize[1];
270     }
271     
272     if(pic->qscale_table==NULL){
273         if (s->encoding) {        
274             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
275             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
276             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
277             CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
278         }
279
280         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
281         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
282         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
283         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
284         if(s->out_format == FMT_H264){
285             for(i=0; i<2; i++){
286                 CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
287                 CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
288             }
289         }
290         pic->qstride= s->mb_stride;
291     }
292
293     //it might be nicer if the application would keep track of these but it would require a API change
294     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
295     s->prev_pict_types[0]= s->pict_type;
296     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
297         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
298     
299     return 0;
300 fail: //for the CHECKED_ALLOCZ macro
301     return -1;
302 }
303
304 /**
305  * deallocates a picture
306  */
307 static void free_picture(MpegEncContext *s, Picture *pic){
308     int i;
309
310     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
311         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
312     }
313
314     av_freep(&pic->mb_var);
315     av_freep(&pic->mc_mb_var);
316     av_freep(&pic->mb_mean);
317     av_freep(&pic->mb_cmp_score);
318     av_freep(&pic->mbskip_table);
319     av_freep(&pic->qscale_table);
320     av_freep(&pic->mb_type_base);
321     pic->mb_type= NULL;
322     for(i=0; i<2; i++){
323         av_freep(&pic->motion_val[i]);
324         av_freep(&pic->ref_index[i]);
325     }
326     
327     if(pic->type == FF_BUFFER_TYPE_SHARED){
328         for(i=0; i<4; i++){
329             pic->base[i]=
330             pic->data[i]= NULL;
331         }
332         pic->type= 0;        
333     }
334 }
335
336 /* init common structure for both encoder and decoder */
337 int MPV_common_init(MpegEncContext *s)
338 {
339     int y_size, c_size, yc_size, i, mb_array_size, x, y;
340
341     dsputil_init(&s->dsp, s->avctx);
342     DCT_common_init(s);
343
344     s->flags= s->avctx->flags;
345
346     s->mb_width  = (s->width  + 15) / 16;
347     s->mb_height = (s->height + 15) / 16;
348     s->mb_stride = s->mb_width + 1;
349     mb_array_size= s->mb_height * s->mb_stride;
350
351     /* set default edge pos, will be overriden in decode_header if needed */
352     s->h_edge_pos= s->mb_width*16;
353     s->v_edge_pos= s->mb_height*16;
354
355     s->mb_num = s->mb_width * s->mb_height;
356     
357     s->block_wrap[0]=
358     s->block_wrap[1]=
359     s->block_wrap[2]=
360     s->block_wrap[3]= s->mb_width*2 + 2;
361     s->block_wrap[4]=
362     s->block_wrap[5]= s->mb_width + 2;
363
364     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
365     c_size = (s->mb_width + 2) * (s->mb_height + 2);
366     yc_size = y_size + 2 * c_size;
367
368     /* convert fourcc to upper case */
369     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
370                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
371                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
372                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
373
374     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
375     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
376
377     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
378
379     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
380     for(y=0; y<s->mb_height; y++){
381         for(x=0; x<s->mb_width; x++){
382             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
383         }
384     }
385     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
386     
387     if (s->encoding) {
388         int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
389
390         /* Allocate MV tables */
391         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
392         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
393         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
394         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
395         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
396         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
397         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
398         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
399         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
400         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
401         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
402         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
403
404         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
405         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
406         
407         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
408         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
409
410         if(s->codec_id==CODEC_ID_MPEG4){
411             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
412             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
413         }
414         
415         if(s->msmpeg4_version){
416             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
417         }
418         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
419
420         /* Allocate MB type table */
421         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
422     }
423         
424     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
425     
426     if (s->out_format == FMT_H263 || s->encoding) {
427         int size;
428
429         /* MV prediction */
430         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
431         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(int16_t));
432     }
433
434     if(s->codec_id==CODEC_ID_MPEG4){
435         /* interlaced direct mode decoding tables */
436         CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
437         CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
438     }
439     if (s->out_format == FMT_H263) {
440         /* ac values */
441         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
442         s->ac_val[1] = s->ac_val[0] + y_size;
443         s->ac_val[2] = s->ac_val[1] + c_size;
444         
445         /* cbp values */
446         CHECKED_ALLOCZ(s->coded_block, y_size);
447         
448         /* divx501 bitstream reorder buffer */
449         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
450
451         /* cbp, ac_pred, pred_dir */
452         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
453         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
454     }
455     
456     if (s->h263_pred || s->h263_plus || !s->encoding) {
457         /* dc values */
458         //MN: we need these for error resilience of intra-frames
459         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(int16_t));
460         s->dc_val[1] = s->dc_val[0] + y_size;
461         s->dc_val[2] = s->dc_val[1] + c_size;
462         for(i=0;i<yc_size;i++)
463             s->dc_val[0][i] = 1024;
464     }
465
466     /* which mb is a intra block */
467     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
468     memset(s->mbintra_table, 1, mb_array_size);
469     
470     /* default structure is frame */
471     s->picture_structure = PICT_FRAME;
472     
473     /* init macroblock skip table */
474     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
475     //Note the +1 is for a quicker mpeg4 slice_end detection
476     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
477     
478     s->block= s->blocks[0];
479
480     s->parse_context.state= -1;
481
482     s->context_initialized = 1;
483     return 0;
484  fail:
485     MPV_common_end(s);
486     return -1;
487 }
488
489
490 //extern int sads;
491
492 /* init common structure for both encoder and decoder */
493 void MPV_common_end(MpegEncContext *s)
494 {
495     int i;
496
497     av_freep(&s->parse_context.buffer);
498     s->parse_context.buffer_size=0;
499
500     av_freep(&s->mb_type);
501     av_freep(&s->p_mv_table_base);
502     av_freep(&s->b_forw_mv_table_base);
503     av_freep(&s->b_back_mv_table_base);
504     av_freep(&s->b_bidir_forw_mv_table_base);
505     av_freep(&s->b_bidir_back_mv_table_base);
506     av_freep(&s->b_direct_mv_table_base);
507     s->p_mv_table= NULL;
508     s->b_forw_mv_table= NULL;
509     s->b_back_mv_table= NULL;
510     s->b_bidir_forw_mv_table= NULL;
511     s->b_bidir_back_mv_table= NULL;
512     s->b_direct_mv_table= NULL;
513     
514     av_freep(&s->motion_val);
515     av_freep(&s->dc_val[0]);
516     av_freep(&s->ac_val[0]);
517     av_freep(&s->coded_block);
518     av_freep(&s->mbintra_table);
519     av_freep(&s->cbp_table);
520     av_freep(&s->pred_dir_table);
521     av_freep(&s->me.scratchpad);
522     av_freep(&s->me.map);
523     av_freep(&s->me.score_map);
524     
525     av_freep(&s->mbskip_table);
526     av_freep(&s->prev_pict_types);
527     av_freep(&s->bitstream_buffer);
528     av_freep(&s->tex_pb_buffer);
529     av_freep(&s->pb2_buffer);
530     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
531     av_freep(&s->field_mv_table);
532     av_freep(&s->field_select_table);
533     av_freep(&s->avctx->stats_out);
534     av_freep(&s->ac_stats);
535     av_freep(&s->error_status_table);
536     av_freep(&s->mb_index2xy);
537
538     for(i=0; i<MAX_PICTURE_COUNT; i++){
539         free_picture(s, &s->picture[i]);
540     }
541     avcodec_default_free_buffers(s->avctx);
542     s->context_initialized = 0;
543 }
544
545 #ifdef CONFIG_ENCODERS
546
547 /* init video encoder */
548 int MPV_encode_init(AVCodecContext *avctx)
549 {
550     MpegEncContext *s = avctx->priv_data;
551     int i, dummy;
552     int chroma_h_shift, chroma_v_shift;
553
554     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
555
556     s->bit_rate = avctx->bit_rate;
557     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
558     s->width = avctx->width;
559     s->height = avctx->height;
560     if(avctx->gop_size > 600){
561         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
562         avctx->gop_size=600;
563     }
564     s->gop_size = avctx->gop_size;
565     s->rtp_mode = avctx->rtp_mode;
566     s->rtp_payload_size = avctx->rtp_payload_size;
567     if (avctx->rtp_callback)
568         s->rtp_callback = avctx->rtp_callback;
569     s->max_qdiff= avctx->max_qdiff;
570     s->qcompress= avctx->qcompress;
571     s->qblur= avctx->qblur;
572     s->avctx = avctx;
573     s->flags= avctx->flags;
574     s->max_b_frames= avctx->max_b_frames;
575     s->b_frame_strategy= avctx->b_frame_strategy;
576     s->codec_id= avctx->codec->id;
577     s->luma_elim_threshold  = avctx->luma_elim_threshold;
578     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
579     s->strict_std_compliance= avctx->strict_std_compliance;
580     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
581     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
582     s->mpeg_quant= avctx->mpeg_quant;
583
584     if (s->gop_size <= 1) {
585         s->intra_only = 1;
586         s->gop_size = 12;
587     } else {
588         s->intra_only = 0;
589     }
590
591     s->me_method = avctx->me_method;
592
593     /* Fixed QSCALE */
594     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
595     
596     s->adaptive_quant= (   s->avctx->lumi_masking
597                         || s->avctx->dark_masking
598                         || s->avctx->temporal_cplx_masking 
599                         || s->avctx->spatial_cplx_masking
600                         || s->avctx->p_masking)
601                        && !s->fixed_qscale;
602     
603     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
604
605     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
606         fprintf(stderr, "4MV not supporetd by codec\n");
607         return -1;
608     }
609     
610     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
611         fprintf(stderr, "qpel not supporetd by codec\n");
612         return -1;
613     }
614
615     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
616         fprintf(stderr, "data partitioning not supporetd by codec\n");
617         return -1;
618     }
619     
620     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
621         fprintf(stderr, "b frames not supporetd by codec\n");
622         return -1;
623     }
624     
625     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
626         fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
627         return -1;
628     }
629         
630     if(s->codec_id==CODEC_ID_MJPEG){
631         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
632         s->inter_quant_bias= 0;
633     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
634         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
635         s->inter_quant_bias= 0;
636     }else{
637         s->intra_quant_bias=0;
638         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
639     }
640     
641     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->intra_quant_bias= avctx->intra_quant_bias;
643     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
644         s->inter_quant_bias= avctx->inter_quant_bias;
645         
646     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
647
648     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
649     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
650
651     switch(avctx->codec->id) {
652     case CODEC_ID_MPEG1VIDEO:
653         s->out_format = FMT_MPEG1;
654         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
655         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
656         break;
657     case CODEC_ID_MPEG2VIDEO:
658         s->out_format = FMT_MPEG1;
659         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
660         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
661         s->rtp_mode= 1; // mpeg2 must have slices
662         if(s->rtp_payload_size == 0) s->rtp_payload_size= 256*256*256;
663         break;
664     case CODEC_ID_LJPEG:
665     case CODEC_ID_MJPEG:
666         s->out_format = FMT_MJPEG;
667         s->intra_only = 1; /* force intra only for jpeg */
668         s->mjpeg_write_tables = 1; /* write all tables */
669         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
670         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
671         s->mjpeg_vsample[1] = 1;
672         s->mjpeg_vsample[2] = 1; 
673         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
674         s->mjpeg_hsample[1] = 1; 
675         s->mjpeg_hsample[2] = 1; 
676         if (mjpeg_init(s) < 0)
677             return -1;
678         avctx->delay=0;
679         s->low_delay=1;
680         break;
681 #ifdef CONFIG_RISKY
682     case CODEC_ID_H263:
683         if (h263_get_picture_format(s->width, s->height) == 7) {
684             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
685             return -1;
686         }
687         s->out_format = FMT_H263;
688         avctx->delay=0;
689         s->low_delay=1;
690         break;
691     case CODEC_ID_H263P:
692         s->out_format = FMT_H263;
693         s->h263_plus = 1;
694         /* Fx */
695         s->unrestricted_mv=(avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
696         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
697         /* /Fx */
698         /* These are just to be sure */
699         s->umvplus = 1;
700         avctx->delay=0;
701         s->low_delay=1;
702         break;
703     case CODEC_ID_FLV1:
704         s->out_format = FMT_H263;
705         s->h263_flv = 2; /* format = 1; 11-bit codes */
706         s->unrestricted_mv = 1;
707         s->rtp_mode=0; /* don't allow GOB */
708         avctx->delay=0;
709         s->low_delay=1;
710         break;
711     case CODEC_ID_RV10:
712         s->out_format = FMT_H263;
713         s->h263_rv10 = 1;
714         avctx->delay=0;
715         s->low_delay=1;
716         break;
717     case CODEC_ID_MPEG4:
718         s->out_format = FMT_H263;
719         s->h263_pred = 1;
720         s->unrestricted_mv = 1;
721         s->low_delay= s->max_b_frames ? 0 : 1;
722         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
723         break;
724     case CODEC_ID_MSMPEG4V1:
725         s->out_format = FMT_H263;
726         s->h263_msmpeg4 = 1;
727         s->h263_pred = 1;
728         s->unrestricted_mv = 1;
729         s->msmpeg4_version= 1;
730         avctx->delay=0;
731         s->low_delay=1;
732         break;
733     case CODEC_ID_MSMPEG4V2:
734         s->out_format = FMT_H263;
735         s->h263_msmpeg4 = 1;
736         s->h263_pred = 1;
737         s->unrestricted_mv = 1;
738         s->msmpeg4_version= 2;
739         avctx->delay=0;
740         s->low_delay=1;
741         break;
742     case CODEC_ID_MSMPEG4V3:
743         s->out_format = FMT_H263;
744         s->h263_msmpeg4 = 1;
745         s->h263_pred = 1;
746         s->unrestricted_mv = 1;
747         s->msmpeg4_version= 3;
748         s->flipflop_rounding=1;
749         avctx->delay=0;
750         s->low_delay=1;
751         break;
752     case CODEC_ID_WMV1:
753         s->out_format = FMT_H263;
754         s->h263_msmpeg4 = 1;
755         s->h263_pred = 1;
756         s->unrestricted_mv = 1;
757         s->msmpeg4_version= 4;
758         s->flipflop_rounding=1;
759         avctx->delay=0;
760         s->low_delay=1;
761         break;
762     case CODEC_ID_WMV2:
763         s->out_format = FMT_H263;
764         s->h263_msmpeg4 = 1;
765         s->h263_pred = 1;
766         s->unrestricted_mv = 1;
767         s->msmpeg4_version= 5;
768         s->flipflop_rounding=1;
769         avctx->delay=0;
770         s->low_delay=1;
771         break;
772 #endif
773     default:
774         return -1;
775     }
776     
777     { /* set up some save defaults, some codecs might override them later */
778         static int done=0;
779         if(!done){
780             int i;
781             done=1;
782
783             default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
784             memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
785             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
786
787             for(i=-16; i<16; i++){
788                 default_fcode_tab[i + MAX_MV]= 1;
789             }
790         }
791     }
792     s->me.mv_penalty= default_mv_penalty;
793     s->fcode_tab= default_fcode_tab;
794     s->y_dc_scale_table=
795     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
796  
797     /* dont use mv_penalty table for crap MV as it would be confused */
798     //FIXME remove after fixing / removing old ME
799     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
800
801     s->encoding = 1;
802
803     /* init */
804     if (MPV_common_init(s) < 0)
805         return -1;
806     
807     ff_init_me(s);
808
809 #ifdef CONFIG_ENCODERS
810 #ifdef CONFIG_RISKY
811     if (s->out_format == FMT_H263)
812         h263_encode_init(s);
813     if(s->msmpeg4_version)
814         ff_msmpeg4_encode_init(s);
815 #endif
816     if (s->out_format == FMT_MPEG1)
817         ff_mpeg1_encode_init(s);
818 #endif
819
820     /* init default q matrix */
821     for(i=0;i<64;i++) {
822         int j= s->dsp.idct_permutation[i];
823 #ifdef CONFIG_RISKY
824         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
825             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
826             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
827         }else if(s->out_format == FMT_H263){
828             s->intra_matrix[j] =
829             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
830         }else
831 #endif
832         { /* mpeg1/2 */
833             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
834             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
835         }
836         if(s->avctx->intra_matrix)
837             s->intra_matrix[j] = s->avctx->intra_matrix[i];
838         if(s->avctx->inter_matrix)
839             s->inter_matrix[j] = s->avctx->inter_matrix[i];
840     }
841
842     /* precompute matrix */
843     /* for mjpeg, we do include qscale in the matrix */
844     if (s->out_format != FMT_MJPEG) {
845         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
846                        s->intra_matrix, s->intra_quant_bias, 1, 31);
847         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
848                        s->inter_matrix, s->inter_quant_bias, 1, 31);
849     }
850
851     if(ff_rate_control_init(s) < 0)
852         return -1;
853
854     s->picture_number = 0;
855     s->picture_in_gop_number = 0;
856     s->fake_picture_number = 0;
857     /* motion detector init */
858     s->f_code = 1;
859     s->b_code = 1;
860
861     return 0;
862 }
863
864 int MPV_encode_end(AVCodecContext *avctx)
865 {
866     MpegEncContext *s = avctx->priv_data;
867
868 #ifdef STATS
869     print_stats();
870 #endif
871
872     ff_rate_control_uninit(s);
873
874     MPV_common_end(s);
875     if (s->out_format == FMT_MJPEG)
876         mjpeg_close(s);
877         
878     av_freep(&avctx->extradata);
879       
880     return 0;
881 }
882
883 #endif //CONFIG_ENCODERS
884
885 void init_rl(RLTable *rl)
886 {
887     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
888     uint8_t index_run[MAX_RUN+1];
889     int last, run, level, start, end, i;
890
891     /* compute max_level[], max_run[] and index_run[] */
892     for(last=0;last<2;last++) {
893         if (last == 0) {
894             start = 0;
895             end = rl->last;
896         } else {
897             start = rl->last;
898             end = rl->n;
899         }
900
901         memset(max_level, 0, MAX_RUN + 1);
902         memset(max_run, 0, MAX_LEVEL + 1);
903         memset(index_run, rl->n, MAX_RUN + 1);
904         for(i=start;i<end;i++) {
905             run = rl->table_run[i];
906             level = rl->table_level[i];
907             if (index_run[run] == rl->n)
908                 index_run[run] = i;
909             if (level > max_level[run])
910                 max_level[run] = level;
911             if (run > max_run[level])
912                 max_run[level] = run;
913         }
914         rl->max_level[last] = av_malloc(MAX_RUN + 1);
915         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
916         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
917         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
918         rl->index_run[last] = av_malloc(MAX_RUN + 1);
919         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
920     }
921 }
922
923 /* draw the edges of width 'w' of an image of size width, height */
924 //FIXME check that this is ok for mpeg4 interlaced
925 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
926 {
927     uint8_t *ptr, *last_line;
928     int i;
929
930     last_line = buf + (height - 1) * wrap;
931     for(i=0;i<w;i++) {
932         /* top and bottom */
933         memcpy(buf - (i + 1) * wrap, buf, width);
934         memcpy(last_line + (i + 1) * wrap, last_line, width);
935     }
936     /* left and right */
937     ptr = buf;
938     for(i=0;i<height;i++) {
939         memset(ptr - w, ptr[0], w);
940         memset(ptr + width, ptr[width-1], w);
941         ptr += wrap;
942     }
943     /* corners */
944     for(i=0;i<w;i++) {
945         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
946         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
947         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
948         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
949     }
950 }
951
952 static int find_unused_picture(MpegEncContext *s, int shared){
953     int i;
954     
955     if(shared){
956         for(i=0; i<MAX_PICTURE_COUNT; i++){
957             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
958         }
959     }else{
960         for(i=0; i<MAX_PICTURE_COUNT; i++){
961             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
962         }
963         for(i=0; i<MAX_PICTURE_COUNT; i++){
964             if(s->picture[i].data[0]==NULL) break;
965         }
966     }
967
968     assert(i<MAX_PICTURE_COUNT);
969     return i;
970 }
971
972 /* generic function for encode/decode called before a frame is coded/decoded */
973 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
974 {
975     int i;
976     AVFrame *pic;
977
978     s->mb_skiped = 0;
979
980     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
981
982     /* mark&release old frames */
983     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr->data[0]) {
984         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
985
986         /* release forgotten pictures */
987         /* if(mpeg124/h263) */
988         if(!s->encoding){
989             for(i=0; i<MAX_PICTURE_COUNT; i++){
990                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
991                     fprintf(stderr, "releasing zombie picture\n");
992                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
993                 }
994             }
995         }
996     }
997 alloc:
998     if(!s->encoding){
999         /* release non refernce frames */
1000         for(i=0; i<MAX_PICTURE_COUNT; i++){
1001             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1002                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1003             }
1004         }
1005
1006         i= find_unused_picture(s, 0);
1007     
1008         pic= (AVFrame*)&s->picture[i];
1009         pic->reference= s->pict_type != B_TYPE ? 3 : 0;
1010
1011         if(s->current_picture_ptr)
1012             pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
1013         
1014         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1015             return -1;
1016
1017         s->current_picture_ptr= &s->picture[i];
1018     }
1019
1020     s->current_picture_ptr->pict_type= s->pict_type;
1021     s->current_picture_ptr->quality= s->qscale;
1022     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1023
1024     s->current_picture= *s->current_picture_ptr;
1025   
1026   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1027     if (s->pict_type != B_TYPE) {
1028         s->last_picture_ptr= s->next_picture_ptr;
1029         s->next_picture_ptr= s->current_picture_ptr;
1030     }
1031     
1032     if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
1033     if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
1034     if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
1035     
1036     if(s->pict_type != I_TYPE && s->last_picture_ptr==NULL){
1037         fprintf(stderr, "warning: first frame is no keyframe\n");
1038         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1039         goto alloc;
1040     }
1041
1042     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1043
1044     if(s->picture_structure!=PICT_FRAME){
1045         int i;
1046         for(i=0; i<4; i++){
1047             if(s->picture_structure == PICT_BOTTOM_FIELD){
1048                  s->current_picture.data[i] += s->current_picture.linesize[i];
1049             } 
1050             s->current_picture.linesize[i] *= 2;
1051             s->last_picture.linesize[i] *=2;
1052             s->next_picture.linesize[i] *=2;
1053         }
1054     }
1055   }
1056    
1057     s->hurry_up= s->avctx->hurry_up;
1058     s->error_resilience= avctx->error_resilience;
1059
1060     /* set dequantizer, we cant do it during init as it might change for mpeg4
1061        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1062     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO) 
1063         s->dct_unquantize = s->dct_unquantize_mpeg2;
1064     else if(s->out_format == FMT_H263)
1065         s->dct_unquantize = s->dct_unquantize_h263;
1066     else 
1067         s->dct_unquantize = s->dct_unquantize_mpeg1;
1068
1069 #ifdef HAVE_XVMC
1070     if(s->avctx->xvmc_acceleration)
1071         return XVMC_field_start(s, avctx);
1072 #endif
1073     return 0;
1074 }
1075
1076 /* generic function for encode/decode called after a frame has been coded/decoded */
1077 void MPV_frame_end(MpegEncContext *s)
1078 {
1079     int i;
1080     /* draw edge for correct motion prediction if outside */
1081 #ifdef HAVE_XVMC
1082 //just to make sure that all data is rendered.
1083     if(s->avctx->xvmc_acceleration){
1084         XVMC_field_end(s);
1085     }else
1086 #endif
1087     if(s->codec_id!=CODEC_ID_SVQ1 && s->out_format != FMT_MPEG1){
1088         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1089             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1090             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1091             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1092         }
1093     }
1094     emms_c();
1095     
1096     s->last_pict_type    = s->pict_type;
1097     if(s->pict_type!=B_TYPE){
1098         s->last_non_b_pict_type= s->pict_type;
1099     }
1100 #if 0
1101         /* copy back current_picture variables */
1102     for(i=0; i<MAX_PICTURE_COUNT; i++){
1103         if(s->picture[i].data[0] == s->current_picture.data[0]){
1104             s->picture[i]= s->current_picture;
1105             break;
1106         }    
1107     }
1108     assert(i<MAX_PICTURE_COUNT);
1109 #endif    
1110
1111     if(s->encoding){
1112         /* release non refernce frames */
1113         for(i=0; i<MAX_PICTURE_COUNT; i++){
1114             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1115                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1116             }
1117         }
1118     }
1119     // clear copies, to avoid confusion
1120 #if 0
1121     memset(&s->last_picture, 0, sizeof(Picture));
1122     memset(&s->next_picture, 0, sizeof(Picture));
1123     memset(&s->current_picture, 0, sizeof(Picture));
1124 #endif
1125 }
1126
1127 /**
1128  * draws an line from (ex, ey) -> (sx, sy).
1129  * @param w width of the image
1130  * @param h height of the image
1131  * @param stride stride/linesize of the image
1132  * @param color color of the arrow
1133  */
1134 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1135     int t, x, y, f;
1136     
1137     sx= clip(sx, 0, w-1);
1138     sy= clip(sy, 0, h-1);
1139     ex= clip(ex, 0, w-1);
1140     ey= clip(ey, 0, h-1);
1141     
1142     buf[sy*stride + sx]+= color;
1143     
1144     if(ABS(ex - sx) > ABS(ey - sy)){
1145         if(sx > ex){
1146             t=sx; sx=ex; ex=t;
1147             t=sy; sy=ey; ey=t;
1148         }
1149         buf+= sx + sy*stride;
1150         ex-= sx;
1151         f= ((ey-sy)<<16)/ex;
1152         for(x= 0; x <= ex; x++){
1153             y= ((x*f) + (1<<15))>>16;
1154             buf[y*stride + x]+= color;
1155         }
1156     }else{
1157         if(sy > ey){
1158             t=sx; sx=ex; ex=t;
1159             t=sy; sy=ey; ey=t;
1160         }
1161         buf+= sx + sy*stride;
1162         ey-= sy;
1163         if(ey) f= ((ex-sx)<<16)/ey;
1164         else   f= 0;
1165         for(y= 0; y <= ey; y++){
1166             x= ((y*f) + (1<<15))>>16;
1167             buf[y*stride + x]+= color;
1168         }
1169     }
1170 }
1171
1172 /**
1173  * draws an arrow from (ex, ey) -> (sx, sy).
1174  * @param w width of the image
1175  * @param h height of the image
1176  * @param stride stride/linesize of the image
1177  * @param color color of the arrow
1178  */
1179 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
1180     int dx,dy;
1181
1182     sx= clip(sx, -100, w+100);
1183     sy= clip(sy, -100, h+100);
1184     ex= clip(ex, -100, w+100);
1185     ey= clip(ey, -100, h+100);
1186     
1187     dx= ex - sx;
1188     dy= ey - sy;
1189     
1190     if(dx*dx + dy*dy > 3*3){
1191         int rx=  dx + dy;
1192         int ry= -dx + dy;
1193         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1194         
1195         //FIXME subpixel accuracy
1196         rx= ROUNDED_DIV(rx*3<<4, length);
1197         ry= ROUNDED_DIV(ry*3<<4, length);
1198         
1199         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1200         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1201     }
1202     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1203 }
1204
1205 /**
1206  * prints debuging info for the given picture.
1207  */
1208 void ff_print_debug_info(MpegEncContext *s, Picture *pict){
1209
1210     if(!pict || !pict->mb_type) return;
1211
1212     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1213         int x,y;
1214
1215         for(y=0; y<s->mb_height; y++){
1216             for(x=0; x<s->mb_width; x++){
1217                 if(s->avctx->debug&FF_DEBUG_SKIP){
1218                     int count= s->mbskip_table[x + y*s->mb_stride];
1219                     if(count>9) count=9;
1220                     printf("%1d", count);
1221                 }
1222                 if(s->avctx->debug&FF_DEBUG_QP){
1223                     printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
1224                 }
1225                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1226                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1227                     
1228                     //Type & MV direction
1229                     if(IS_PCM(mb_type))
1230                         printf("P");
1231                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1232                         printf("A");
1233                     else if(IS_INTRA4x4(mb_type))
1234                         printf("i");
1235                     else if(IS_INTRA16x16(mb_type))
1236                         printf("I");
1237                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1238                         printf("d");
1239                     else if(IS_DIRECT(mb_type))
1240                         printf("D");
1241                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1242                         printf("g");
1243                     else if(IS_GMC(mb_type))
1244                         printf("G");
1245                     else if(IS_SKIP(mb_type))
1246                         printf("S");
1247                     else if(!USES_LIST(mb_type, 1))
1248                         printf(">");
1249                     else if(!USES_LIST(mb_type, 0))
1250                         printf("<");
1251                     else{
1252                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1253                         printf("X");
1254                     }
1255                     
1256                     //segmentation
1257                     if(IS_8X8(mb_type))
1258                         printf("+");
1259                     else if(IS_16X8(mb_type))
1260                         printf("-");
1261                     else if(IS_8X16(mb_type))
1262                         printf("¦");
1263                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1264                         printf(" ");
1265                     else
1266                         printf("?");
1267                     
1268                         
1269                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1270                         printf("=");
1271                     else
1272                         printf(" ");
1273                 }
1274 //                printf(" ");
1275             }
1276             printf("\n");
1277         }
1278     }
1279     
1280     if((s->avctx->debug&FF_DEBUG_VIS_MV) && s->motion_val){
1281         const int shift= 1 + s->quarter_sample;
1282         int mb_y;
1283         uint8_t *ptr= pict->data[0];
1284         s->low_delay=0; //needed to see the vectors without trashing the buffers
1285
1286         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1287             int mb_x;
1288             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1289                 const int mb_index= mb_x + mb_y*s->mb_stride;
1290                 if(IS_8X8(s->current_picture.mb_type[mb_index])){
1291                     int i;
1292                     for(i=0; i<4; i++){
1293                         int sx= mb_x*16 + 4 + 8*(i&1);
1294                         int sy= mb_y*16 + 4 + 8*(i>>1);
1295                         int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
1296                         int mx= (s->motion_val[xy][0]>>shift) + sx;
1297                         int my= (s->motion_val[xy][1]>>shift) + sy;
1298                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1299                     }
1300                 }else{
1301                     int sx= mb_x*16 + 8;
1302                     int sy= mb_y*16 + 8;
1303                     int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
1304                     int mx= (s->motion_val[xy][0]>>shift) + sx;
1305                     int my= (s->motion_val[xy][1]>>shift) + sy;
1306                     draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1307                 }
1308                 s->mbskip_table[mb_index]=0;
1309             }
1310         }
1311     }
1312 }
1313
1314 #ifdef CONFIG_ENCODERS
1315
1316 static int get_sae(uint8_t *src, int ref, int stride){
1317     int x,y;
1318     int acc=0;
1319     
1320     for(y=0; y<16; y++){
1321         for(x=0; x<16; x++){
1322             acc+= ABS(src[x+y*stride] - ref);
1323         }
1324     }
1325     
1326     return acc;
1327 }
1328
1329 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1330     int x, y, w, h;
1331     int acc=0;
1332     
1333     w= s->width &~15;
1334     h= s->height&~15;
1335     
1336     for(y=0; y<h; y+=16){
1337         for(x=0; x<w; x+=16){
1338             int offset= x + y*stride;
1339             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1340             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1341             int sae = get_sae(src + offset, mean, stride);
1342             
1343             acc+= sae + 500 < sad;
1344         }
1345     }
1346     return acc;
1347 }
1348
1349
1350 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1351     AVFrame *pic=NULL;
1352     int i;
1353     const int encoding_delay= s->max_b_frames;
1354     int direct=1;
1355     
1356   if(pic_arg){
1357     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1358     if(pic_arg->linesize[0] != s->linesize) direct=0;
1359     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1360     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1361   
1362 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1363     
1364     if(direct){
1365         i= find_unused_picture(s, 1);
1366
1367         pic= (AVFrame*)&s->picture[i];
1368         pic->reference= 3;
1369     
1370         for(i=0; i<4; i++){
1371             pic->data[i]= pic_arg->data[i];
1372             pic->linesize[i]= pic_arg->linesize[i];
1373         }
1374         alloc_picture(s, (Picture*)pic, 1);
1375     }else{
1376         i= find_unused_picture(s, 0);
1377
1378         pic= (AVFrame*)&s->picture[i];
1379         pic->reference= 3;
1380
1381         alloc_picture(s, (Picture*)pic, 0);
1382         for(i=0; i<4; i++){
1383             /* the input will be 16 pixels to the right relative to the actual buffer start
1384              * and the current_pic, so the buffer can be reused, yes its not beatifull 
1385              */
1386             pic->data[i]+= 16; 
1387         }
1388
1389         if(   pic->data[0] == pic_arg->data[0] 
1390            && pic->data[1] == pic_arg->data[1]
1391            && pic->data[2] == pic_arg->data[2]){
1392        // empty
1393         }else{
1394             int h_chroma_shift, v_chroma_shift;
1395             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1396         
1397             for(i=0; i<3; i++){
1398                 int src_stride= pic_arg->linesize[i];
1399                 int dst_stride= i ? s->uvlinesize : s->linesize;
1400                 int h_shift= i ? h_chroma_shift : 0;
1401                 int v_shift= i ? v_chroma_shift : 0;
1402                 int w= s->width >>h_shift;
1403                 int h= s->height>>v_shift;
1404                 uint8_t *src= pic_arg->data[i];
1405                 uint8_t *dst= pic->data[i];
1406             
1407                 if(src_stride==dst_stride)
1408                     memcpy(dst, src, src_stride*h);
1409                 else{
1410                     while(h--){
1411                         memcpy(dst, src, w);
1412                         dst += dst_stride;
1413                         src += src_stride;
1414                     }
1415                 }
1416             }
1417         }
1418     }
1419     pic->quality= pic_arg->quality;
1420     pic->pict_type= pic_arg->pict_type;
1421     pic->pts = pic_arg->pts;
1422     
1423     if(s->input_picture[encoding_delay])
1424         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1425     
1426   }
1427
1428     /* shift buffer entries */
1429     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1430         s->input_picture[i-1]= s->input_picture[i];
1431         
1432     s->input_picture[encoding_delay]= (Picture*)pic;
1433
1434     return 0;
1435 }
1436
1437 static void select_input_picture(MpegEncContext *s){
1438     int i;
1439     int coded_pic_num=0;    
1440
1441     if(s->reordered_input_picture[0])
1442         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1443
1444     for(i=1; i<MAX_PICTURE_COUNT; i++)
1445         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1446     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1447
1448     /* set next picture types & ordering */
1449     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1450         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1451             s->reordered_input_picture[0]= s->input_picture[0];
1452             s->reordered_input_picture[0]->pict_type= I_TYPE;
1453             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1454         }else{
1455             int b_frames;
1456             
1457             if(s->flags&CODEC_FLAG_PASS2){
1458                 for(i=0; i<s->max_b_frames+1; i++){
1459                     int pict_num= s->input_picture[0]->display_picture_number + i;
1460                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1461                     s->input_picture[i]->pict_type= pict_type;
1462                     
1463                     if(i + 1 >= s->rc_context.num_entries) break;
1464                 }
1465             }
1466
1467             if(s->input_picture[0]->pict_type){
1468                 /* user selected pict_type */
1469                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1470                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1471                 }
1472             
1473                 if(b_frames > s->max_b_frames){
1474                     fprintf(stderr, "warning, too many bframes in a row\n");
1475                     b_frames = s->max_b_frames;
1476                 }
1477             }else if(s->b_frame_strategy==0){
1478                 b_frames= s->max_b_frames;
1479                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1480             }else if(s->b_frame_strategy==1){
1481                 for(i=1; i<s->max_b_frames+1; i++){
1482                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1483                         s->input_picture[i]->b_frame_score= 
1484                             get_intra_count(s, s->input_picture[i  ]->data[0], 
1485                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1486                     }
1487                 }
1488                 for(i=0; i<s->max_b_frames; i++){
1489                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1490                 }
1491                                 
1492                 b_frames= FFMAX(0, i-1);
1493                 
1494                 /* reset scores */
1495                 for(i=0; i<b_frames+1; i++){
1496                     s->input_picture[i]->b_frame_score=0;
1497                 }
1498             }else{
1499                 fprintf(stderr, "illegal b frame strategy\n");
1500                 b_frames=0;
1501             }
1502
1503             emms_c();
1504 //static int b_count=0;
1505 //b_count+= b_frames;
1506 //printf("b_frames: %d\n", b_count);
1507                         
1508             s->reordered_input_picture[0]= s->input_picture[b_frames];
1509             if(   s->picture_in_gop_number + b_frames >= s->gop_size 
1510                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1511                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1512             else
1513                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1514             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1515             for(i=0; i<b_frames; i++){
1516                 coded_pic_num++;
1517                 s->reordered_input_picture[i+1]= s->input_picture[i];
1518                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1519                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1520             }
1521         }
1522     }
1523     
1524     if(s->reordered_input_picture[0]){
1525         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
1526
1527         s->new_picture= *s->reordered_input_picture[0];
1528
1529         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1530             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
1531         
1532             int i= find_unused_picture(s, 0);
1533             Picture *pic= &s->picture[i];
1534
1535             /* mark us unused / free shared pic */
1536             for(i=0; i<4; i++)
1537                 s->reordered_input_picture[0]->data[i]= NULL;
1538             s->reordered_input_picture[0]->type= 0;
1539             
1540             //FIXME bad, copy * except
1541             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1542             pic->quality   = s->reordered_input_picture[0]->quality;
1543             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1544             pic->reference = s->reordered_input_picture[0]->reference;
1545             pic->pts = s->reordered_input_picture[0]->pts;
1546             
1547             alloc_picture(s, pic, 0);
1548
1549             s->current_picture_ptr= pic;
1550         }else{
1551             // input is not a shared pix -> reuse buffer for current_pix
1552
1553             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
1554                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1555             
1556             s->current_picture_ptr= s->reordered_input_picture[0];
1557             for(i=0; i<4; i++){
1558                 //reverse the +16 we did before storing the input
1559                 s->current_picture_ptr->data[i]-=16;
1560             }
1561         }
1562         s->current_picture= *s->current_picture_ptr;
1563     
1564         s->picture_number= s->new_picture.display_picture_number;
1565 //printf("dpn:%d\n", s->picture_number);
1566     }else{
1567        memset(&s->new_picture, 0, sizeof(Picture));
1568     }
1569 }
1570
1571 int MPV_encode_picture(AVCodecContext *avctx,
1572                        unsigned char *buf, int buf_size, void *data)
1573 {
1574     MpegEncContext *s = avctx->priv_data;
1575     AVFrame *pic_arg = data;
1576     int i;
1577
1578     if(avctx->pix_fmt != PIX_FMT_YUV420P){
1579         fprintf(stderr, "this codec supports only YUV420P\n");
1580         return -1;
1581     }
1582     
1583     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1584
1585     s->picture_in_gop_number++;
1586
1587     load_input_picture(s, pic_arg);
1588     
1589     select_input_picture(s);
1590     
1591     /* output? */
1592     if(s->new_picture.data[0]){
1593
1594         s->pict_type= s->new_picture.pict_type;
1595         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1596             s->qscale= (int)(s->new_picture.quality+0.5);
1597             assert(s->qscale);
1598         }
1599 //emms_c();
1600 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1601         MPV_frame_start(s, avctx);
1602
1603         encode_picture(s, s->picture_number);
1604         
1605         avctx->real_pict_num  = s->picture_number;
1606         avctx->header_bits = s->header_bits;
1607         avctx->mv_bits     = s->mv_bits;
1608         avctx->misc_bits   = s->misc_bits;
1609         avctx->i_tex_bits  = s->i_tex_bits;
1610         avctx->p_tex_bits  = s->p_tex_bits;
1611         avctx->i_count     = s->i_count;
1612         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1613         avctx->skip_count  = s->skip_count;
1614
1615         MPV_frame_end(s);
1616
1617         if (s->out_format == FMT_MJPEG)
1618             mjpeg_picture_trailer(s);
1619         
1620         if(s->flags&CODEC_FLAG_PASS1)
1621             ff_write_pass1_stats(s);
1622
1623         for(i=0; i<4; i++){
1624             avctx->error[i] += s->current_picture_ptr->error[i];
1625         }
1626     }
1627
1628     s->input_picture_number++;
1629
1630     flush_put_bits(&s->pb);
1631     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1632     
1633     s->total_bits += s->frame_bits;
1634     avctx->frame_bits  = s->frame_bits;
1635     
1636     return pbBufPtr(&s->pb) - s->pb.buf;
1637 }
1638
1639 #endif //CONFIG_ENCODERS
1640
1641 static inline void gmc1_motion(MpegEncContext *s,
1642                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1643                                int dest_offset,
1644                                uint8_t **ref_picture, int src_offset)
1645 {
1646     uint8_t *ptr;
1647     int offset, src_x, src_y, linesize, uvlinesize;
1648     int motion_x, motion_y;
1649     int emu=0;
1650
1651     motion_x= s->sprite_offset[0][0];
1652     motion_y= s->sprite_offset[0][1];
1653     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1654     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1655     motion_x<<=(3-s->sprite_warping_accuracy);
1656     motion_y<<=(3-s->sprite_warping_accuracy);
1657     src_x = clip(src_x, -16, s->width);
1658     if (src_x == s->width)
1659         motion_x =0;
1660     src_y = clip(src_y, -16, s->height);
1661     if (src_y == s->height)
1662         motion_y =0;
1663
1664     linesize = s->linesize;
1665     uvlinesize = s->uvlinesize;
1666     
1667     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1668
1669     dest_y+=dest_offset;
1670     if(s->flags&CODEC_FLAG_EMU_EDGE){
1671         if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
1672                               || src_y + 17 >= s->v_edge_pos){
1673             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1674             ptr= s->edge_emu_buffer;
1675         }
1676     }
1677     
1678     if((motion_x|motion_y)&7){
1679         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1680         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1681     }else{
1682         int dxy;
1683         
1684         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1685         if (s->no_rounding){
1686             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1687         }else{
1688             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1689         }
1690     }
1691     
1692     if(s->flags&CODEC_FLAG_GRAY) return;
1693
1694     motion_x= s->sprite_offset[1][0];
1695     motion_y= s->sprite_offset[1][1];
1696     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1697     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1698     motion_x<<=(3-s->sprite_warping_accuracy);
1699     motion_y<<=(3-s->sprite_warping_accuracy);
1700     src_x = clip(src_x, -8, s->width>>1);
1701     if (src_x == s->width>>1)
1702         motion_x =0;
1703     src_y = clip(src_y, -8, s->height>>1);
1704     if (src_y == s->height>>1)
1705         motion_y =0;
1706
1707     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1708     ptr = ref_picture[1] + offset;
1709     if(s->flags&CODEC_FLAG_EMU_EDGE){
1710         if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
1711                               || src_y + 9 >= s->v_edge_pos>>1){
1712             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1713             ptr= s->edge_emu_buffer;
1714             emu=1;
1715         }
1716     }
1717     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1718     
1719     ptr = ref_picture[2] + offset;
1720     if(emu){
1721         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1722         ptr= s->edge_emu_buffer;
1723     }
1724     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1725     
1726     return;
1727 }
1728
1729 static inline void gmc_motion(MpegEncContext *s,
1730                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1731                                int dest_offset,
1732                                uint8_t **ref_picture, int src_offset)
1733 {
1734     uint8_t *ptr;
1735     int linesize, uvlinesize;
1736     const int a= s->sprite_warping_accuracy;
1737     int ox, oy;
1738
1739     linesize = s->linesize;
1740     uvlinesize = s->uvlinesize;
1741
1742     ptr = ref_picture[0] + src_offset;
1743
1744     dest_y+=dest_offset;
1745     
1746     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1747     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1748
1749     s->dsp.gmc(dest_y, ptr, linesize, 16,
1750            ox, 
1751            oy, 
1752            s->sprite_delta[0][0], s->sprite_delta[0][1],
1753            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1754            a+1, (1<<(2*a+1)) - s->no_rounding,
1755            s->h_edge_pos, s->v_edge_pos);
1756     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1757            ox + s->sprite_delta[0][0]*8, 
1758            oy + s->sprite_delta[1][0]*8, 
1759            s->sprite_delta[0][0], s->sprite_delta[0][1],
1760            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1761            a+1, (1<<(2*a+1)) - s->no_rounding,
1762            s->h_edge_pos, s->v_edge_pos);
1763
1764     if(s->flags&CODEC_FLAG_GRAY) return;
1765
1766
1767     dest_cb+=dest_offset>>1;
1768     dest_cr+=dest_offset>>1;
1769     
1770     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1771     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1772
1773     ptr = ref_picture[1] + (src_offset>>1);
1774     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1775            ox, 
1776            oy, 
1777            s->sprite_delta[0][0], s->sprite_delta[0][1],
1778            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1779            a+1, (1<<(2*a+1)) - s->no_rounding,
1780            s->h_edge_pos>>1, s->v_edge_pos>>1);
1781     
1782     ptr = ref_picture[2] + (src_offset>>1);
1783     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1784            ox, 
1785            oy, 
1786            s->sprite_delta[0][0], s->sprite_delta[0][1],
1787            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1788            a+1, (1<<(2*a+1)) - s->no_rounding,
1789            s->h_edge_pos>>1, s->v_edge_pos>>1);
1790 }
1791
1792 /**
1793  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1794  * @param buf destination buffer
1795  * @param src source buffer
1796  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1797  * @param block_w width of block
1798  * @param block_h height of block
1799  * @param src_x x coordinate of the top left sample of the block in the source buffer
1800  * @param src_y y coordinate of the top left sample of the block in the source buffer
1801  * @param w width of the source buffer
1802  * @param h height of the source buffer
1803  */
1804 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
1805                                     int src_x, int src_y, int w, int h){
1806     int x, y;
1807     int start_y, start_x, end_y, end_x;
1808
1809     if(src_y>= h){
1810         src+= (h-1-src_y)*linesize;
1811         src_y=h-1;
1812     }else if(src_y<=-block_h){
1813         src+= (1-block_h-src_y)*linesize;
1814         src_y=1-block_h;
1815     }
1816     if(src_x>= w){
1817         src+= (w-1-src_x);
1818         src_x=w-1;
1819     }else if(src_x<=-block_w){
1820         src+= (1-block_w-src_x);
1821         src_x=1-block_w;
1822     }
1823
1824     start_y= FFMAX(0, -src_y);
1825     start_x= FFMAX(0, -src_x);
1826     end_y= FFMIN(block_h, h-src_y);
1827     end_x= FFMIN(block_w, w-src_x);
1828
1829     // copy existing part
1830     for(y=start_y; y<end_y; y++){
1831         for(x=start_x; x<end_x; x++){
1832             buf[x + y*linesize]= src[x + y*linesize];
1833         }
1834     }
1835
1836     //top
1837     for(y=0; y<start_y; y++){
1838         for(x=start_x; x<end_x; x++){
1839             buf[x + y*linesize]= buf[x + start_y*linesize];
1840         }
1841     }
1842
1843     //bottom
1844     for(y=end_y; y<block_h; y++){
1845         for(x=start_x; x<end_x; x++){
1846             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1847         }
1848     }
1849                                     
1850     for(y=0; y<block_h; y++){
1851        //left
1852         for(x=0; x<start_x; x++){
1853             buf[x + y*linesize]= buf[start_x + y*linesize];
1854         }
1855        
1856        //right
1857         for(x=end_x; x<block_w; x++){
1858             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1859         }
1860     }
1861 }
1862
1863
1864 /* apply one mpeg motion vector to the three components */
1865 static inline void mpeg_motion(MpegEncContext *s,
1866                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1867                                int dest_offset,
1868                                uint8_t **ref_picture, int src_offset,
1869                                int field_based, op_pixels_func (*pix_op)[4],
1870                                int motion_x, int motion_y, int h)
1871 {
1872     uint8_t *ptr;
1873     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1874     int emu=0;
1875 #if 0    
1876 if(s->quarter_sample)
1877 {
1878     motion_x>>=1;
1879     motion_y>>=1;
1880 }
1881 #endif
1882     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1883     src_x = s->mb_x * 16 + (motion_x >> 1);
1884     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1885                 
1886     /* WARNING: do no forget half pels */
1887     height = s->height >> field_based;
1888     v_edge_pos = s->v_edge_pos >> field_based;
1889     src_x = clip(src_x, -16, s->width);
1890     if (src_x == s->width)
1891         dxy &= ~1;
1892     src_y = clip(src_y, -16, height);
1893     if (src_y == height)
1894         dxy &= ~2;
1895     linesize   = s->current_picture.linesize[0] << field_based;
1896     uvlinesize = s->current_picture.linesize[1] << field_based;
1897     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1898     dest_y += dest_offset;
1899
1900     if(s->flags&CODEC_FLAG_EMU_EDGE){
1901         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1902                               || src_y + (motion_y&1) + h  > v_edge_pos){
1903             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
1904                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1905             ptr= s->edge_emu_buffer + src_offset;
1906             emu=1;
1907         }
1908     }
1909     pix_op[0][dxy](dest_y, ptr, linesize, h);
1910
1911     if(s->flags&CODEC_FLAG_GRAY) return;
1912
1913     if (s->out_format == FMT_H263) {
1914         dxy = 0;
1915         if ((motion_x & 3) != 0)
1916             dxy |= 1;
1917         if ((motion_y & 3) != 0)
1918             dxy |= 2;
1919         mx = motion_x >> 2;
1920         my = motion_y >> 2;
1921     } else {
1922         mx = motion_x / 2;
1923         my = motion_y / 2;
1924         dxy = ((my & 1) << 1) | (mx & 1);
1925         mx >>= 1;
1926         my >>= 1;
1927     }
1928     
1929     src_x = s->mb_x * 8 + mx;
1930     src_y = s->mb_y * (8 >> field_based) + my;
1931     src_x = clip(src_x, -8, s->width >> 1);
1932     if (src_x == (s->width >> 1))
1933         dxy &= ~1;
1934     src_y = clip(src_y, -8, height >> 1);
1935     if (src_y == (height >> 1))
1936         dxy &= ~2;
1937     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1938     ptr = ref_picture[1] + offset;
1939     if(emu){
1940         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1941                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1942         ptr= s->edge_emu_buffer + (src_offset >> 1);
1943     }
1944     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1945
1946     ptr = ref_picture[2] + offset;
1947     if(emu){
1948         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1949                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1950         ptr= s->edge_emu_buffer + (src_offset >> 1);
1951     }
1952     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1953 }
1954
1955 static inline void qpel_motion(MpegEncContext *s,
1956                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1957                                int dest_offset,
1958                                uint8_t **ref_picture, int src_offset,
1959                                int field_based, op_pixels_func (*pix_op)[4],
1960                                qpel_mc_func (*qpix_op)[16],
1961                                int motion_x, int motion_y, int h)
1962 {
1963     uint8_t *ptr;
1964     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1965     int emu=0;
1966
1967     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1968     src_x = s->mb_x * 16 + (motion_x >> 2);
1969     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1970
1971     height = s->height >> field_based;
1972     v_edge_pos = s->v_edge_pos >> field_based;
1973     src_x = clip(src_x, -16, s->width);
1974     if (src_x == s->width)
1975         dxy &= ~3;
1976     src_y = clip(src_y, -16, height);
1977     if (src_y == height)
1978         dxy &= ~12;
1979     linesize = s->linesize << field_based;
1980     uvlinesize = s->uvlinesize << field_based;
1981     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1982     dest_y += dest_offset;
1983 //printf("%d %d %d\n", src_x, src_y, dxy);
1984     
1985     if(s->flags&CODEC_FLAG_EMU_EDGE){
1986         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1987                               || src_y + (motion_y&3) + h  > v_edge_pos){
1988             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based, 
1989                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1990             ptr= s->edge_emu_buffer + src_offset;
1991             emu=1;
1992         }
1993     }
1994     if(!field_based)
1995         qpix_op[0][dxy](dest_y, ptr, linesize);
1996     else{
1997         //damn interlaced mode
1998         //FIXME boundary mirroring is not exactly correct here
1999         qpix_op[1][dxy](dest_y  , ptr  , linesize);
2000         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
2001     }
2002
2003     if(s->flags&CODEC_FLAG_GRAY) return;
2004
2005     if(field_based){
2006         mx= motion_x/2;
2007         my= motion_y>>1;
2008     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2009         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2010         mx= (motion_x>>1) + rtab[motion_x&7];
2011         my= (motion_y>>1) + rtab[motion_y&7];
2012     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2013         mx= (motion_x>>1)|(motion_x&1);
2014         my= (motion_y>>1)|(motion_y&1);
2015     }else{
2016         mx= motion_x/2;
2017         my= motion_y/2;
2018     }
2019     mx= (mx>>1)|(mx&1);
2020     my= (my>>1)|(my&1);
2021
2022     dxy= (mx&1) | ((my&1)<<1);
2023     mx>>=1;
2024     my>>=1;
2025
2026     src_x = s->mb_x * 8 + mx;
2027     src_y = s->mb_y * (8 >> field_based) + my;
2028     src_x = clip(src_x, -8, s->width >> 1);
2029     if (src_x == (s->width >> 1))
2030         dxy &= ~1;
2031     src_y = clip(src_y, -8, height >> 1);
2032     if (src_y == (height >> 1))
2033         dxy &= ~2;
2034
2035     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
2036     ptr = ref_picture[1] + offset;
2037     if(emu){
2038         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
2039                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2040         ptr= s->edge_emu_buffer + (src_offset >> 1);
2041     }
2042     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2043     
2044     ptr = ref_picture[2] + offset;
2045     if(emu){
2046         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
2047                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2048         ptr= s->edge_emu_buffer + (src_offset >> 1);
2049     }
2050     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2051 }
2052
2053 inline int ff_h263_round_chroma(int x){
2054     if (x >= 0)
2055         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2056     else {
2057         x = -x;
2058         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2059     }
2060 }
2061
2062 /**
2063  * motion compesation of a single macroblock
2064  * @param s context
2065  * @param dest_y luma destination pointer
2066  * @param dest_cb chroma cb/u destination pointer
2067  * @param dest_cr chroma cr/v destination pointer
2068  * @param dir direction (0->forward, 1->backward)
2069  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2070  * @param pic_op halfpel motion compensation function (average or put normally)
2071  * @param pic_op qpel motion compensation function (average or put normally)
2072  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2073  */
2074 static inline void MPV_motion(MpegEncContext *s, 
2075                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2076                               int dir, uint8_t **ref_picture, 
2077                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2078 {
2079     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
2080     int mb_x, mb_y, i;
2081     uint8_t *ptr, *dest;
2082     int emu=0;
2083
2084     mb_x = s->mb_x;
2085     mb_y = s->mb_y;
2086
2087     switch(s->mv_type) {
2088     case MV_TYPE_16X16:
2089 #ifdef CONFIG_RISKY
2090         if(s->mcsel){
2091             if(s->real_sprite_warping_points==1){
2092                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
2093                             ref_picture, 0);
2094             }else{
2095                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
2096                             ref_picture, 0);
2097             }
2098         }else if(s->quarter_sample){
2099             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2100                         ref_picture, 0,
2101                         0, pix_op, qpix_op,
2102                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2103         }else if(s->mspel){
2104             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2105                         ref_picture, pix_op,
2106                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2107         }else
2108 #endif
2109         {
2110             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2111                         ref_picture, 0,
2112                         0, pix_op,
2113                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2114         }           
2115         break;
2116     case MV_TYPE_8X8:
2117         mx = 0;
2118         my = 0;
2119         if(s->quarter_sample){
2120             for(i=0;i<4;i++) {
2121                 motion_x = s->mv[dir][i][0];
2122                 motion_y = s->mv[dir][i][1];
2123
2124                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2125                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2126                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2127                     
2128                 /* WARNING: do no forget half pels */
2129                 src_x = clip(src_x, -16, s->width);
2130                 if (src_x == s->width)
2131                     dxy &= ~3;
2132                 src_y = clip(src_y, -16, s->height);
2133                 if (src_y == s->height)
2134                     dxy &= ~12;
2135                     
2136                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2137                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2138                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
2139                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
2140                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2141                         ptr= s->edge_emu_buffer;
2142                     }
2143                 }
2144                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2145                 qpix_op[1][dxy](dest, ptr, s->linesize);
2146
2147                 mx += s->mv[dir][i][0]/2;
2148                 my += s->mv[dir][i][1]/2;
2149             }
2150         }else{
2151             for(i=0;i<4;i++) {
2152                 motion_x = s->mv[dir][i][0];
2153                 motion_y = s->mv[dir][i][1];
2154
2155                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2156                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
2157                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
2158                     
2159                 /* WARNING: do no forget half pels */
2160                 src_x = clip(src_x, -16, s->width);
2161                 if (src_x == s->width)
2162                     dxy &= ~1;
2163                 src_y = clip(src_y, -16, s->height);
2164                 if (src_y == s->height)
2165                     dxy &= ~2;
2166                     
2167                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2168                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2169                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
2170                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
2171                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2172                         ptr= s->edge_emu_buffer;
2173                     }
2174                 }
2175                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2176                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
2177
2178                 mx += s->mv[dir][i][0];
2179                 my += s->mv[dir][i][1];
2180             }
2181         }
2182
2183         if(s->flags&CODEC_FLAG_GRAY) break;
2184         /* In case of 8X8, we construct a single chroma motion vector
2185            with a special rounding */
2186         mx= ff_h263_round_chroma(mx);
2187         my= ff_h263_round_chroma(my);
2188         dxy = ((my & 1) << 1) | (mx & 1);
2189         mx >>= 1;
2190         my >>= 1;
2191
2192         src_x = mb_x * 8 + mx;
2193         src_y = mb_y * 8 + my;
2194         src_x = clip(src_x, -8, s->width/2);
2195         if (src_x == s->width/2)
2196             dxy &= ~1;
2197         src_y = clip(src_y, -8, s->height/2);
2198         if (src_y == s->height/2)
2199             dxy &= ~2;
2200         
2201         offset = (src_y * (s->uvlinesize)) + src_x;
2202         ptr = ref_picture[1] + offset;
2203         if(s->flags&CODEC_FLAG_EMU_EDGE){
2204                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
2205                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
2206                     ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2207                     ptr= s->edge_emu_buffer;
2208                     emu=1;
2209                 }
2210             }
2211         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
2212
2213         ptr = ref_picture[2] + offset;
2214         if(emu){
2215             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2216             ptr= s->edge_emu_buffer;
2217         }
2218         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
2219         break;
2220     case MV_TYPE_FIELD:
2221         if (s->picture_structure == PICT_FRAME) {
2222             if(s->quarter_sample){
2223                 /* top field */
2224                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2225                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2226                             1, pix_op, qpix_op,
2227                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2228                 /* bottom field */
2229                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2230                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2231                             1, pix_op, qpix_op,
2232                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2233             }else{
2234                 /* top field */       
2235                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2236                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2237                             1, pix_op,
2238                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2239                 /* bottom field */
2240                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2241                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2242                             1, pix_op,
2243                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2244             }
2245         } else {
2246             int offset;
2247             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2248                 offset= s->field_select[dir][0] ? s->linesize : 0;
2249             }else{
2250                 ref_picture= s->current_picture.data;
2251                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
2252             } 
2253
2254             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2255                         ref_picture, offset,
2256                         0, pix_op,
2257                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2258         }
2259         break;
2260     case MV_TYPE_16X8:{
2261         int offset;
2262          uint8_t ** ref2picture;
2263
2264             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2265                 ref2picture= ref_picture;
2266                 offset= s->field_select[dir][0] ? s->linesize : 0;
2267             }else{
2268                 ref2picture= s->current_picture.data;
2269                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
2270             } 
2271
2272             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2273                         ref2picture, offset,
2274                         0, pix_op,
2275                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2276
2277
2278             if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
2279                 ref2picture= ref_picture;
2280                 offset= s->field_select[dir][1] ? s->linesize : 0;
2281             }else{
2282                 ref2picture= s->current_picture.data;
2283                 offset= s->field_select[dir][1] ? s->linesize : -s->linesize; 
2284             } 
2285             // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
2286             mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
2287                         0,
2288                         ref2picture, offset,
2289                         0, pix_op,
2290                         s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
2291         }
2292         
2293         break;
2294     case MV_TYPE_DMV:
2295     {
2296     op_pixels_func (*dmv_pix_op)[4];
2297     int offset;
2298
2299         dmv_pix_op = s->dsp.put_pixels_tab;
2300
2301         if(s->picture_structure == PICT_FRAME){
2302             //put top field from top field
2303             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2304                         ref_picture, 0,
2305                         1, dmv_pix_op,
2306                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2307             //put bottom field from bottom field
2308             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2309                         ref_picture, s->linesize,
2310                         1, dmv_pix_op,
2311                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2312
2313             dmv_pix_op = s->dsp.avg_pixels_tab; 
2314         
2315             //avg top field from bottom field
2316             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2317                         ref_picture, s->linesize,
2318                         1, dmv_pix_op,
2319                         s->mv[dir][2][0], s->mv[dir][2][1], 8);
2320             //avg bottom field from top field
2321             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2322                         ref_picture, 0,
2323                         1, dmv_pix_op,
2324                         s->mv[dir][3][0], s->mv[dir][3][1], 8);
2325
2326         }else{
2327             offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2328                          s->linesize : 0;
2329
2330             //put field from the same parity
2331             //same parity is never in the same frame
2332             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2333                         ref_picture,offset,
2334                         0,dmv_pix_op,
2335                         s->mv[dir][0][0],s->mv[dir][0][1],16);
2336
2337             // after put we make avg of the same block
2338             dmv_pix_op=s->dsp.avg_pixels_tab; 
2339
2340             //opposite parity is always in the same frame if this is second field
2341             if(!s->first_field){
2342                 ref_picture = s->current_picture.data;    
2343                 //top field is one linesize from frame beginig
2344                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2345                         -s->linesize : s->linesize;
2346             }else 
2347                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2348                         0 : s->linesize;
2349
2350             //avg field from the opposite parity
2351             mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
2352                         ref_picture, offset,
2353                         0,dmv_pix_op,
2354                         s->mv[dir][2][0],s->mv[dir][2][1],16);
2355         }
2356     }
2357     break;
2358
2359     }
2360 }
2361
2362
2363 /* put block[] to dest[] */
2364 static inline void put_dct(MpegEncContext *s, 
2365                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2366 {
2367     s->dct_unquantize(s, block, i, s->qscale);
2368     s->dsp.idct_put (dest, line_size, block);
2369 }
2370
2371 /* add block[] to dest[] */
2372 static inline void add_dct(MpegEncContext *s, 
2373                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2374 {
2375     if (s->block_last_index[i] >= 0) {
2376         s->dsp.idct_add (dest, line_size, block);
2377     }
2378 }
2379
2380 static inline void add_dequant_dct(MpegEncContext *s, 
2381                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2382 {
2383     if (s->block_last_index[i] >= 0) {
2384         s->dct_unquantize(s, block, i, s->qscale);
2385
2386         s->dsp.idct_add (dest, line_size, block);
2387     }
2388 }
2389
2390 /**
2391  * cleans dc, ac, coded_block for the current non intra MB
2392  */
2393 void ff_clean_intra_table_entries(MpegEncContext *s)
2394 {
2395     int wrap = s->block_wrap[0];
2396     int xy = s->block_index[0];
2397     
2398     s->dc_val[0][xy           ] = 
2399     s->dc_val[0][xy + 1       ] = 
2400     s->dc_val[0][xy     + wrap] =
2401     s->dc_val[0][xy + 1 + wrap] = 1024;
2402     /* ac pred */
2403     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
2404     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
2405     if (s->msmpeg4_version>=3) {
2406         s->coded_block[xy           ] =
2407         s->coded_block[xy + 1       ] =
2408         s->coded_block[xy     + wrap] =
2409         s->coded_block[xy + 1 + wrap] = 0;
2410     }
2411     /* chroma */
2412     wrap = s->block_wrap[4];
2413     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
2414     s->dc_val[1][xy] =
2415     s->dc_val[2][xy] = 1024;
2416     /* ac pred */
2417     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
2418     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
2419     
2420     s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
2421 }
2422
2423 /* generic function called after a macroblock has been parsed by the
2424    decoder or after it has been encoded by the encoder.
2425
2426    Important variables used:
2427    s->mb_intra : true if intra macroblock
2428    s->mv_dir   : motion vector direction
2429    s->mv_type  : motion vector type
2430    s->mv       : motion vector
2431    s->interlaced_dct : true if interlaced dct used (mpeg2)
2432  */
2433 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2434 {
2435     int mb_x, mb_y;
2436     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
2437 #ifdef HAVE_XVMC
2438     if(s->avctx->xvmc_acceleration){
2439         XVMC_decode_mb(s,block);
2440         return;
2441     }
2442 #endif
2443
2444     mb_x = s->mb_x;
2445     mb_y = s->mb_y;
2446
2447     s->current_picture.qscale_table[mb_xy]= s->qscale;
2448
2449     /* update DC predictors for P macroblocks */
2450     if (!s->mb_intra) {
2451         if (s->h263_pred || s->h263_aic) {
2452             if(s->mbintra_table[mb_xy])
2453                 ff_clean_intra_table_entries(s);
2454         } else {
2455             s->last_dc[0] =
2456             s->last_dc[1] =
2457             s->last_dc[2] = 128 << s->intra_dc_precision;
2458         }
2459     }
2460     else if (s->h263_pred || s->h263_aic)
2461         s->mbintra_table[mb_xy]=1;
2462
2463     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
2464         uint8_t *dest_y, *dest_cb, *dest_cr;
2465         int dct_linesize, dct_offset;
2466         op_pixels_func (*op_pix)[4];
2467         qpel_mc_func (*op_qpix)[16];
2468         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2469         const int uvlinesize= s->current_picture.linesize[1];
2470
2471         /* avoid copy if macroblock skipped in last frame too */
2472         /* skip only during decoding as we might trash the buffers during encoding a bit */
2473         if(!s->encoding){
2474             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
2475             const int age= s->current_picture.age;
2476
2477             assert(age);
2478
2479             if (s->mb_skiped) {
2480                 s->mb_skiped= 0;
2481                 assert(s->pict_type!=I_TYPE);
2482  
2483                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2484                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2485
2486                 /* if previous was skipped too, then nothing to do !  */
2487                 if (*mbskip_ptr >= age && s->current_picture.reference){
2488                     return;
2489                 }
2490             } else if(!s->current_picture.reference){
2491                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
2492                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2493             } else{
2494                 *mbskip_ptr = 0; /* not skipped */
2495             }
2496         }
2497
2498         if (s->interlaced_dct) {
2499             dct_linesize = linesize * 2;
2500             dct_offset = linesize;
2501         } else {
2502             dct_linesize = linesize;
2503             dct_offset = linesize * 8;
2504         }
2505         
2506         dest_y=  s->dest[0];
2507         dest_cb= s->dest[1];
2508         dest_cr= s->dest[2];
2509
2510         if (!s->mb_intra) {
2511             /* motion handling */
2512             /* decoding or more than one mb_type (MC was allready done otherwise) */
2513             if(!s->encoding){
2514                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
2515                     op_pix = s->dsp.put_pixels_tab;
2516                     op_qpix= s->dsp.put_qpel_pixels_tab;
2517                 }else{
2518                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2519                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2520                 }
2521
2522                 if (s->mv_dir & MV_DIR_FORWARD) {
2523                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2524                     op_pix = s->dsp.avg_pixels_tab;
2525                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2526                 }
2527                 if (s->mv_dir & MV_DIR_BACKWARD) {
2528                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2529                 }
2530             }
2531
2532             /* skip dequant / idct if we are really late ;) */
2533             if(s->hurry_up>1) return;
2534
2535             /* add dct residue */
2536             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
2537                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2538                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2539                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2540                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2541                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2542
2543                 if(!(s->flags&CODEC_FLAG_GRAY)){
2544                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
2545                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
2546                 }
2547             } else if(s->codec_id != CODEC_ID_WMV2){
2548                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2549                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2550                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2551                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2552
2553                 if(!(s->flags&CODEC_FLAG_GRAY)){
2554                     add_dct(s, block[4], 4, dest_cb, uvlinesize);
2555                     add_dct(s, block[5], 5, dest_cr, uvlinesize);
2556                 }
2557             } 
2558 #ifdef CONFIG_RISKY
2559             else{
2560                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2561             }
2562 #endif
2563         } else {
2564             /* dct only in intra block */
2565             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2566                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2567                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2568                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2569                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2570
2571                 if(!(s->flags&CODEC_FLAG_GRAY)){
2572                     put_dct(s, block[4], 4, dest_cb, uvlinesize);
2573                     put_dct(s, block[5], 5, dest_cr, uvlinesize);
2574                 }
2575             }else{
2576                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
2577                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
2578                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2579                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2580
2581                 if(!(s->flags&CODEC_FLAG_GRAY)){
2582                     s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2583                     s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2584                 }
2585             }
2586         }
2587     }
2588 }
2589
2590 #ifdef CONFIG_ENCODERS
2591
2592 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2593 {
2594     static const char tab[64]=
2595         {3,2,2,1,1,1,1,1,
2596          1,1,1,1,1,1,1,1,
2597          1,1,1,1,1,1,1,1,
2598          0,0,0,0,0,0,0,0,
2599          0,0,0,0,0,0,0,0,
2600          0,0,0,0,0,0,0,0,
2601          0,0,0,0,0,0,0,0,
2602          0,0,0,0,0,0,0,0};
2603     int score=0;
2604     int run=0;
2605     int i;
2606     DCTELEM *block= s->block[n];
2607     const int last_index= s->block_last_index[n];
2608     int skip_dc;
2609
2610     if(threshold<0){
2611         skip_dc=0;
2612         threshold= -threshold;
2613     }else
2614         skip_dc=1;
2615
2616     /* are all which we could set to zero are allready zero? */
2617     if(last_index<=skip_dc - 1) return;
2618
2619     for(i=0; i<=last_index; i++){
2620         const int j = s->intra_scantable.permutated[i];
2621         const int level = ABS(block[j]);
2622         if(level==1){
2623             if(skip_dc && i==0) continue;
2624             score+= tab[run];
2625             run=0;
2626         }else if(level>1){
2627             return;
2628         }else{
2629             run++;
2630         }
2631     }
2632     if(score >= threshold) return;
2633     for(i=skip_dc; i<=last_index; i++){
2634         const int j = s->intra_scantable.permutated[i];
2635         block[j]=0;
2636     }
2637     if(block[0]) s->block_last_index[n]= 0;
2638     else         s->block_last_index[n]= -1;
2639 }
2640
2641 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2642 {
2643     int i;
2644     const int maxlevel= s->max_qcoeff;
2645     const int minlevel= s->min_qcoeff;
2646     
2647     if(s->mb_intra){
2648         i=1; //skip clipping of intra dc
2649     }else
2650         i=0;
2651     
2652     for(;i<=last_index; i++){
2653         const int j= s->intra_scantable.permutated[i];
2654         int level = block[j];
2655        
2656         if     (level>maxlevel) level=maxlevel;
2657         else if(level<minlevel) level=minlevel;
2658
2659         block[j]= level;
2660     }
2661 }
2662
2663 #if 0
2664 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2665     int score=0;
2666     int x,y;
2667     
2668     for(y=0; y<7; y++){
2669         for(x=0; x<16; x+=4){
2670             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
2671                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2672         }
2673         s+= stride;
2674     }
2675     
2676     return score;
2677 }
2678
2679 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2680     int score=0;
2681     int x,y;
2682     
2683     for(y=0; y<7; y++){
2684         for(x=0; x<16; x++){
2685             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2686         }
2687         s1+= stride;
2688         s2+= stride;
2689     }
2690     
2691     return score;
2692 }
2693 #else
2694 #define SQ(a) ((a)*(a))
2695
2696 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2697     int score=0;
2698     int x,y;
2699     
2700     for(y=0; y<7; y++){
2701         for(x=0; x<16; x+=4){
2702             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
2703                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2704         }
2705         s+= stride;
2706     }
2707     
2708     return score;
2709 }
2710
2711 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2712     int score=0;
2713     int x,y;
2714     
2715     for(y=0; y<7; y++){
2716         for(x=0; x<16; x++){
2717             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2718         }
2719         s1+= stride;
2720         s2+= stride;
2721     }
2722     
2723     return score;
2724 }
2725
2726 #endif
2727
2728 #endif //CONFIG_ENCODERS
2729
2730 /**
2731  *
2732  * @param h is the normal height, this will be reduced automatically if needed for the last row
2733  */
2734 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2735     if (s->avctx->draw_horiz_band) {
2736         AVFrame *src;
2737         int offset[4];
2738         
2739         if(s->picture_structure != PICT_FRAME){
2740             h <<= 1;
2741             y <<= 1;
2742             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2743         }
2744
2745         h= FFMIN(h, s->height - y);
2746
2747         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
2748             src= (AVFrame*)s->current_picture_ptr;
2749         else if(s->last_picture_ptr)
2750             src= (AVFrame*)s->last_picture_ptr;
2751         else
2752             return;
2753             
2754         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2755             offset[0]=
2756             offset[1]=
2757             offset[2]=
2758             offset[3]= 0;
2759         }else{
2760             offset[0]= y * s->linesize;;
2761             offset[1]= 
2762             offset[2]= (y>>1) * s->uvlinesize;;
2763             offset[3]= 0;
2764         }
2765
2766         emms_c();
2767
2768         s->avctx->draw_horiz_band(s->avctx, src, offset,
2769                                   y, s->picture_structure, h);
2770     }
2771 }
2772
2773 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2774     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2775     const int uvlinesize= s->current_picture.linesize[1];
2776         
2777     s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2778     s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
2779     s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
2780     s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
2781     s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2782     s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2783     
2784     if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){
2785         s->dest[0] = s->current_picture.data[0] + s->mb_x * 16 - 16;
2786         s->dest[1] = s->current_picture.data[1] + s->mb_x * 8 - 8;
2787         s->dest[2] = s->current_picture.data[2] + s->mb_x * 8 - 8;
2788     }else{
2789         s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* linesize  ) + s->mb_x * 16 - 16;
2790         s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2791         s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2792     }    
2793 }
2794
2795 #ifdef CONFIG_ENCODERS
2796
2797 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2798 {
2799     const int mb_x= s->mb_x;
2800     const int mb_y= s->mb_y;
2801     int i;
2802     int skip_dct[6];
2803     int dct_offset   = s->linesize*8; //default for progressive frames
2804     
2805     for(i=0; i<6; i++) skip_dct[i]=0;
2806     
2807     if(s->adaptive_quant){
2808         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
2809
2810         if(s->out_format==FMT_H263){
2811             if     (s->dquant> 2) s->dquant= 2;
2812             else if(s->dquant<-2) s->dquant=-2;
2813         }
2814             
2815         if(s->codec_id==CODEC_ID_MPEG4){        
2816             if(!s->mb_intra){
2817                 if(s->mv_dir&MV_DIRECT)
2818                     s->dquant=0;
2819
2820                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2821             }
2822         }
2823         s->qscale+= s->dquant;
2824         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2825         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2826     }
2827
2828     if (s->mb_intra) {
2829         uint8_t *ptr;
2830         int wrap_y;
2831         int emu=0;
2832
2833         wrap_y = s->linesize;
2834         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2835
2836         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2837             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2838             ptr= s->edge_emu_buffer;
2839             emu=1;
2840         }
2841         
2842         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2843             int progressive_score, interlaced_score;
2844             
2845             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2846             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2847             
2848             if(progressive_score > interlaced_score + 100){
2849                 s->interlaced_dct=1;
2850             
2851                 dct_offset= wrap_y;
2852                 wrap_y<<=1;
2853             }else
2854                 s->interlaced_dct=0;
2855         }
2856         
2857         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2858         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2859         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2860         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2861
2862         if(s->flags&CODEC_FLAG_GRAY){
2863             skip_dct[4]= 1;
2864             skip_dct[5]= 1;
2865         }else{
2866             int wrap_c = s->uvlinesize;
2867             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2868             if(emu){
2869                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2870                 ptr= s->edge_emu_buffer;
2871             }
2872             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2873
2874             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2875             if(emu){
2876                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2877                 ptr= s->edge_emu_buffer;
2878             }
2879             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2880         }
2881     }else{
2882         op_pixels_func (*op_pix)[4];
2883         qpel_mc_func (*op_qpix)[16];
2884         uint8_t *dest_y, *dest_cb, *dest_cr;
2885         uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2886         int wrap_y, wrap_c;
2887         int emu=0;
2888
2889         dest_y  = s->dest[0];
2890         dest_cb = s->dest[1];
2891         dest_cr = s->dest[2];
2892         wrap_y = s->linesize;
2893         wrap_c = s->uvlinesize;
2894         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2895         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2896         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2897
2898         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2899             op_pix = s->dsp.put_pixels_tab;
2900             op_qpix= s->dsp.put_qpel_pixels_tab;
2901         }else{
2902             op_pix = s->dsp.put_no_rnd_pixels_tab;
2903             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2904         }
2905
2906         if (s->mv_dir & MV_DIR_FORWARD) {
2907             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2908             op_pix = s->dsp.avg_pixels_tab;
2909             op_qpix= s->dsp.avg_qpel_pixels_tab;
2910         }
2911         if (s->mv_dir & MV_DIR_BACKWARD) {
2912             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2913         }
2914
2915         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2916             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2917             ptr_y= s->edge_emu_buffer;
2918             emu=1;
2919         }
2920         
2921         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2922             int progressive_score, interlaced_score;
2923             
2924             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
2925                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2926             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2927                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2928             
2929             if(progressive_score > interlaced_score + 600){
2930                 s->interlaced_dct=1;
2931             
2932                 dct_offset= wrap_y;
2933                 wrap_y<<=1;
2934             }else
2935                 s->interlaced_dct=0;
2936         }
2937         
2938         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2939         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2940         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2941         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2942         
2943         if(s->flags&CODEC_FLAG_GRAY){
2944             skip_dct[4]= 1;
2945             skip_dct[5]= 1;
2946         }else{
2947             if(emu){
2948                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2949                 ptr_cb= s->edge_emu_buffer;
2950             }
2951             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2952             if(emu){
2953                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2954                 ptr_cr= s->edge_emu_buffer;
2955             }
2956             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2957         }
2958         /* pre quantization */         
2959         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
2960             //FIXME optimize
2961             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2962             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2963             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2964             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2965             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2966             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2967 #if 0
2968 {
2969  static int stat[7];
2970  int num=0;
2971  for(i=0; i<6; i++)
2972   if(skip_dct[i]) num++;
2973  stat[num]++;
2974  
2975  if(s->mb_x==0 && s->mb_y==0){
2976   for(i=0; i<7; i++){
2977    printf("%6d %1d\n", stat[i], i);
2978   }
2979  }
2980 }
2981 #endif
2982         }
2983
2984     }
2985             
2986 #if 0
2987             {
2988                 float adap_parm;
2989                 
2990                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
2991                             ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2992             
2993                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", 
2994                         (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P', 
2995                         s->qscale, adap_parm, s->qscale*adap_parm,
2996                         s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
2997             }
2998 #endif
2999     /* DCT & quantize */
3000     if(s->out_format==FMT_MJPEG){
3001         for(i=0;i<6;i++) {
3002             int overflow;
3003             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
3004             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3005         }
3006     }else{
3007         for(i=0;i<6;i++) {
3008             if(!skip_dct[i]){
3009                 int overflow;
3010                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3011             // FIXME we could decide to change to quantizer instead of clipping
3012             // JS: I don't think that would be a good idea it could lower quality instead
3013             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3014                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3015             }else
3016                 s->block_last_index[i]= -1;
3017         }
3018         if(s->luma_elim_threshold && !s->mb_intra)
3019             for(i=0; i<4; i++)
3020                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3021         if(s->chroma_elim_threshold && !s->mb_intra)
3022             for(i=4; i<6; i++)
3023                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3024     }
3025
3026     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3027         s->block_last_index[4]=
3028         s->block_last_index[5]= 0;
3029         s->block[4][0]=
3030         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3031     }
3032
3033     /* huffman encode */
3034     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3035     case CODEC_ID_MPEG1VIDEO:
3036     case CODEC_ID_MPEG2VIDEO:
3037         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3038 #ifdef CONFIG_RISKY
3039     case CODEC_ID_MPEG4:
3040         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3041     case CODEC_ID_MSMPEG4V2:
3042     case CODEC_ID_MSMPEG4V3:
3043     case CODEC_ID_WMV1:
3044         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3045     case CODEC_ID_WMV2:
3046          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3047     case CODEC_ID_H263:
3048     case CODEC_ID_H263P:
3049     case CODEC_ID_FLV1:
3050     case CODEC_ID_RV10:
3051         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3052 #endif
3053     case CODEC_ID_MJPEG:
3054         mjpeg_encode_mb(s, s->block); break;
3055     default:
3056         assert(0);
3057     }
3058 }
3059
3060 #endif //CONFIG_ENCODERS
3061
3062 /**
3063  * combines the (truncated) bitstream to a complete frame
3064  * @returns -1 if no complete frame could be created
3065  */
3066 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
3067     ParseContext *pc= &s->parse_context;
3068
3069 #if 0
3070     if(pc->overread){
3071         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3072         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3073     }
3074 #endif
3075
3076     /* copy overreaded byes from last frame into buffer */
3077     for(; pc->overread>0; pc->overread--){
3078         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
3079     }
3080     
3081     pc->last_index= pc->index;
3082
3083     /* copy into buffer end return */
3084     if(next == END_NOT_FOUND){
3085         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3086
3087         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
3088         pc->index += *buf_size;
3089         return -1;
3090     }
3091
3092     *buf_size=
3093     pc->overread_index= pc->index + next;
3094     
3095     /* append to buffer */
3096     if(pc->index){
3097         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3098
3099         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
3100         pc->index = 0;
3101         *buf= pc->buffer;
3102     }
3103
3104     /* store overread bytes */
3105     for(;next < 0; next++){
3106         pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
3107         pc->overread++;
3108     }
3109
3110 #if 0
3111     if(pc->overread){
3112         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3113         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3114     }
3115 #endif
3116
3117     return 0;
3118 }
3119
3120 void ff_mpeg_flush(AVCodecContext *avctx){
3121     int i;
3122     MpegEncContext *s = avctx->priv_data;
3123     
3124     for(i=0; i<MAX_PICTURE_COUNT; i++){
3125        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3126                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3127         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3128     }
3129     s->last_picture_ptr = s->next_picture_ptr = NULL;
3130     
3131     s->parse_context.state= -1;
3132     s->parse_context.frame_start_found= 0;
3133     s->parse_context.overread= 0;
3134     s->parse_context.overread_index= 0;
3135     s->parse_context.index= 0;
3136     s->parse_context.last_index= 0;
3137 }
3138
3139 #ifdef CONFIG_ENCODERS
3140 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3141 {
3142     int bytes= length>>4;
3143     int bits= length&15;
3144     int i;
3145
3146     if(length==0) return;
3147
3148     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
3149     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
3150 }
3151
3152 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3153     int i;
3154
3155     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3156
3157     /* mpeg1 */
3158     d->mb_skip_run= s->mb_skip_run;
3159     for(i=0; i<3; i++)
3160         d->last_dc[i]= s->last_dc[i];
3161     
3162     /* statistics */
3163     d->mv_bits= s->mv_bits;
3164     d->i_tex_bits= s->i_tex_bits;
3165     d->p_tex_bits= s->p_tex_bits;
3166     d->i_count= s->i_count;
3167     d->f_count= s->f_count;
3168     d->b_count= s->b_count;
3169     d->skip_count= s->skip_count;
3170     d->misc_bits= s->misc_bits;
3171     d->last_bits= 0;
3172
3173     d->mb_skiped= 0;
3174     d->qscale= s->qscale;
3175 }
3176
3177 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3178     int i;
3179
3180     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
3181     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3182     
3183     /* mpeg1 */
3184     d->mb_skip_run= s->mb_skip_run;
3185     for(i=0; i<3; i++)
3186         d->last_dc[i]= s->last_dc[i];
3187     
3188     /* statistics */
3189     d->mv_bits= s->mv_bits;
3190     d->i_tex_bits= s->i_tex_bits;
3191     d->p_tex_bits= s->p_tex_bits;
3192     d->i_count= s->i_count;
3193     d->f_count= s->f_count;
3194     d->b_count= s->b_count;
3195     d->skip_count= s->skip_count;
3196     d->misc_bits= s->misc_bits;
3197
3198     d->mb_intra= s->mb_intra;
3199     d->mb_skiped= s->mb_skiped;
3200     d->mv_type= s->mv_type;
3201     d->mv_dir= s->mv_dir;
3202     d->pb= s->pb;
3203     if(s->data_partitioning){
3204         d->pb2= s->pb2;
3205         d->tex_pb= s->tex_pb;
3206     }
3207     d->block= s->block;
3208     for(i=0; i<6; i++)
3209         d->block_last_index[i]= s->block_last_index[i];
3210     d->interlaced_dct= s->interlaced_dct;
3211     d->qscale= s->qscale;
3212 }
3213
3214 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
3215                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3216                            int *dmin, int *next_block, int motion_x, int motion_y)
3217 {
3218     int score;
3219     uint8_t *dest_backup[3];
3220     
3221     copy_context_before_encode(s, backup, type);
3222
3223     s->block= s->blocks[*next_block];
3224     s->pb= pb[*next_block];
3225     if(s->data_partitioning){
3226         s->pb2   = pb2   [*next_block];
3227         s->tex_pb= tex_pb[*next_block];
3228     }
3229     
3230     if(*next_block){
3231         memcpy(dest_backup, s->dest, sizeof(s->dest));
3232         s->dest[0] = s->me.scratchpad;
3233         s->dest[1] = s->me.scratchpad + 16;
3234         s->dest[2] = s->me.scratchpad + 16 + 8;
3235         assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding
3236         assert(s->linesize >= 64); //FIXME
3237     }
3238
3239     encode_mb(s, motion_x, motion_y);
3240     
3241     score= get_bit_count(&s->pb);
3242     if(s->data_partitioning){
3243         score+= get_bit_count(&s->pb2);
3244         score+= get_bit_count(&s->tex_pb);
3245     }
3246    
3247     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3248         MPV_decode_mb(s, s->block);
3249
3250         score *= s->qscale * s->qscale * 109;
3251         score += sse_mb(s) << 7;
3252     }
3253     
3254     if(*next_block){
3255         memcpy(s->dest, dest_backup, sizeof(s->dest));
3256     }
3257
3258     if(score<*dmin){
3259         *dmin= score;
3260         *next_block^=1;
3261
3262         copy_context_after_encode(best, s, type);
3263     }
3264 }
3265                 
3266 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3267     uint32_t *sq = squareTbl + 256;
3268     int acc=0;
3269     int x,y;
3270     
3271     if(w==16 && h==16) 
3272         return s->dsp.sse[0](NULL, src1, src2, stride);
3273     else if(w==8 && h==8)
3274         return s->dsp.sse[1](NULL, src1, src2, stride);
3275     
3276     for(y=0; y<h; y++){
3277         for(x=0; x<w; x++){
3278             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3279         } 
3280     }
3281     
3282     assert(acc>=0);
3283     
3284     return acc;
3285 }
3286
3287 static int sse_mb(MpegEncContext *s){
3288     int w= 16;
3289     int h= 16;
3290
3291     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3292     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3293
3294     if(w==16 && h==16)
3295         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
3296                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
3297                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
3298     else
3299         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3300                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3301                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3302 }
3303
3304 static void encode_picture(MpegEncContext *s, int picture_number)
3305 {
3306     int mb_x, mb_y, pdif = 0;
3307     int i;
3308     int bits;
3309     MpegEncContext best_s, backup_s;
3310     uint8_t bit_buf[2][3000];
3311     uint8_t bit_buf2[2][3000];
3312     uint8_t bit_buf_tex[2][3000];
3313     PutBitContext pb[2], pb2[2], tex_pb[2];
3314
3315     for(i=0; i<2; i++){
3316         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
3317         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
3318         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
3319     }
3320
3321     s->picture_number = picture_number;
3322     
3323     /* Reset the average MB variance */
3324     s->current_picture.mb_var_sum = 0;
3325     s->current_picture.mc_mb_var_sum = 0;
3326
3327 #ifdef CONFIG_RISKY
3328     /* we need to initialize some time vars before we can encode b-frames */
3329     // RAL: Condition added for MPEG1VIDEO
3330     //FIXME figure out why mpeg1/2 need this !!!
3331     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
3332         ff_set_mpeg4_time(s, s->picture_number); 
3333 #endif
3334         
3335     s->scene_change_score=0;
3336     
3337     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
3338     
3339     if(s->pict_type==I_TYPE){
3340         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3341         else                        s->no_rounding=0;
3342     }else if(s->pict_type!=B_TYPE){
3343         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3344             s->no_rounding ^= 1;          
3345     }
3346     
3347     /* Estimate motion for every MB */
3348     s->mb_intra=0; //for the rate distoration & bit compare functions
3349     if(s->pict_type != I_TYPE){
3350         if(s->pict_type != B_TYPE){
3351             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
3352                 s->me.pre_pass=1;
3353                 s->me.dia_size= s->avctx->pre_dia_size;
3354
3355                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
3356                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
3357                         s->mb_x = mb_x;
3358                         s->mb_y = mb_y;
3359                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
3360                     }
3361                 }
3362                 s->me.pre_pass=0;
3363             }
3364         }
3365
3366         s->me.dia_size= s->avctx->dia_size;
3367         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3368             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3369             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3370             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3371             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3372             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3373                 s->mb_x = mb_x;
3374                 s->mb_y = mb_y;
3375                 s->block_index[0]+=2;
3376                 s->block_index[1]+=2;
3377                 s->block_index[2]+=2;
3378                 s->block_index[3]+=2;
3379                 
3380                 /* compute motion vector & mb_type and store in context */
3381                 if(s->pict_type==B_TYPE)
3382                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
3383                 else
3384                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
3385             }
3386         }
3387     }else /* if(s->pict_type == I_TYPE) */{
3388         /* I-Frame */
3389         //FIXME do we need to zero them?
3390         memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3391         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3392         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3393         
3394         if(!s->fixed_qscale){
3395             /* finding spatial complexity for I-frame rate control */
3396             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3397                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3398                     int xx = mb_x * 16;
3399                     int yy = mb_y * 16;
3400                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
3401                     int varc;
3402                     int sum = s->dsp.pix_sum(pix, s->linesize);
3403     
3404                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
3405
3406                     s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
3407                     s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
3408                     s->current_picture.mb_var_sum    += varc;
3409                 }
3410             }
3411         }
3412     }
3413     emms_c();
3414
3415     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
3416         s->pict_type= I_TYPE;
3417         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3418 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3419     }
3420
3421     if(!s->umvplus){
3422         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3423             s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3424         
3425             ff_fix_long_p_mvs(s);
3426         }
3427
3428         if(s->pict_type==B_TYPE){
3429             int a, b;
3430
3431             a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3432             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3433             s->f_code = FFMAX(a, b);
3434
3435             a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3436             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3437             s->b_code = FFMAX(a, b);
3438
3439             ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3440             ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3441             ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3442             ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3443         }
3444     }
3445     
3446     if (s->fixed_qscale) 
3447         s->frame_qscale = s->current_picture.quality;
3448     else
3449         s->frame_qscale = ff_rate_estimate_qscale(s);
3450
3451     if(s->adaptive_quant){
3452 #ifdef CONFIG_RISKY
3453         switch(s->codec_id){
3454         case CODEC_ID_MPEG4:
3455             ff_clean_mpeg4_qscales(s);
3456             break;
3457         case CODEC_ID_H263:
3458         case CODEC_ID_H263P:
3459         case CODEC_ID_FLV1:
3460             ff_clean_h263_qscales(s);
3461             break;
3462         }
3463 #endif
3464
3465         s->qscale= s->current_picture.qscale_table[0];
3466     }else
3467         s->qscale= (int)(s->frame_qscale + 0.5);
3468         
3469     if (s->out_format == FMT_MJPEG) {
3470         /* for mjpeg, we do include qscale in the matrix */
3471         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3472         for(i=1;i<64;i++){
3473             int j= s->dsp.idct_permutation[i];
3474
3475             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3476         }
3477         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
3478                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
3479     }
3480     
3481     //FIXME var duplication
3482     s->current_picture.key_frame= s->pict_type == I_TYPE;
3483     s->current_picture.pict_type= s->pict_type;
3484
3485     if(s->current_picture.key_frame)
3486         s->picture_in_gop_number=0;
3487
3488     s->last_bits= get_bit_count(&s->pb);
3489     switch(s->out_format) {
3490     case FMT_MJPEG:
3491         mjpeg_picture_header(s);
3492         break;
3493 #ifdef CONFIG_RISKY
3494     case FMT_H263:
3495         if (s->codec_id == CODEC_ID_WMV2) 
3496             ff_wmv2_encode_picture_header(s, picture_number);
3497         else if (s->h263_msmpeg4) 
3498             msmpeg4_encode_picture_header(s, picture_number);
3499         else if (s->h263_pred)
3500             mpeg4_encode_picture_header(s, picture_number);
3501         else if (s->h263_rv10) 
3502             rv10_encode_picture_header(s, picture_number);
3503         else if (s->codec_id == CODEC_ID_FLV1)
3504             ff_flv_encode_picture_header(s, picture_number);
3505         else
3506             h263_encode_picture_header(s, picture_number);
3507         break;
3508 #endif
3509     case FMT_MPEG1:
3510         mpeg1_encode_picture_header(s, picture_number);
3511         break;
3512     case FMT_H264:
3513         break;
3514     }
3515     bits= get_bit_count(&s->pb);
3516     s->header_bits= bits - s->last_bits;
3517     s->last_bits= bits;
3518     s->mv_bits=0;
3519     s->misc_bits=0;
3520     s->i_tex_bits=0;
3521     s->p_tex_bits=0;
3522     s->i_count=0;
3523     s->f_count=0;
3524     s->b_count=0;
3525     s->skip_count=0;
3526
3527     for(i=0; i<3; i++){
3528         /* init last dc values */
3529         /* note: quant matrix value (8) is implied here */
3530         s->last_dc[i] = 128;
3531         
3532         s->current_picture_ptr->error[i] = 0;
3533     }
3534     s->mb_skip_run = 0;
3535     s->last_mv[0][0][0] = 0;
3536     s->last_mv[0][0][1] = 0;
3537     s->last_mv[1][0][0] = 0;
3538     s->last_mv[1][0][1] = 0;
3539      
3540     s->last_mv_dir = 0;
3541
3542 #ifdef CONFIG_RISKY
3543     switch(s->codec_id){
3544     case CODEC_ID_H263:
3545     case CODEC_ID_H263P:
3546     case CODEC_ID_FLV1:
3547         s->gob_index = ff_h263_get_gob_height(s);
3548         break;
3549     case CODEC_ID_MPEG4:
3550         if(s->partitioned_frame)
3551             ff_mpeg4_init_partitions(s);
3552         break;
3553     }
3554 #endif
3555
3556     s->resync_mb_x=0;
3557     s->resync_mb_y=0;
3558     s->first_slice_line = 1;
3559     s->ptr_lastgob = s->pb.buf;
3560     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3561         s->mb_x=0;
3562         s->mb_y= mb_y;
3563
3564         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
3565         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
3566         ff_init_block_index(s);
3567         
3568         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3569             const int xy= mb_y*s->mb_stride + mb_x;
3570             int mb_type= s->mb_type[xy];
3571 //            int d;
3572             int dmin= INT_MAX;
3573
3574             s->mb_x = mb_x;
3575             ff_update_block_index(s);
3576
3577             /* write gob / video packet header  */
3578 #ifdef CONFIG_RISKY
3579             if(s->rtp_mode && mb_y + mb_x>0){
3580                 int current_packet_size, is_gob_start;
3581                 
3582                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
3583                 is_gob_start=0;
3584                 
3585                 if(s->codec_id==CODEC_ID_MPEG4){
3586                     if(current_packet_size >= s->rtp_payload_size){
3587
3588                         if(s->partitioned_frame){
3589                             ff_mpeg4_merge_partitions(s);
3590                             ff_mpeg4_init_partitions(s);
3591                         }
3592                         ff_mpeg4_encode_video_packet_header(s);
3593
3594                         if(s->flags&CODEC_FLAG_PASS1){
3595                             int bits= get_bit_count(&s->pb);
3596                             s->misc_bits+= bits - s->last_bits;
3597                             s->last_bits= bits;
3598                         }
3599                         ff_mpeg4_clean_buffers(s);
3600                         is_gob_start=1;
3601                     }
3602                 }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
3603                     if(   current_packet_size >= s->rtp_payload_size 
3604                        && s->mb_skip_run==0){
3605                         ff_mpeg1_encode_slice_header(s);
3606                         ff_mpeg1_clean_buffers(s);
3607                         is_gob_start=1;
3608                     }
3609                 }else if(s->codec_id==CODEC_ID_MPEG2VIDEO){
3610                     if(   (   current_packet_size >= s->rtp_payload_size || mb_x==0)
3611                        && s->mb_skip_run==0){
3612                         ff_mpeg1_encode_slice_header(s);
3613                         ff_mpeg1_clean_buffers(s);
3614                         is_gob_start=1;
3615                     }
3616                 }else{
3617                     if(current_packet_size >= s->rtp_payload_size
3618                        && s->mb_x==0 && s->mb_y%s->gob_index==0){
3619                        
3620                         h263_encode_gob_header(s, mb_y);                       
3621                         is_gob_start=1;
3622                     }
3623                 }
3624
3625                 if(is_gob_start){
3626                     s->ptr_lastgob = pbBufPtr(&s->pb);
3627                     s->first_slice_line=1;
3628                     s->resync_mb_x=mb_x;
3629                     s->resync_mb_y=mb_y;
3630                 }
3631             }
3632 #endif
3633
3634             if(  (s->resync_mb_x   == s->mb_x)
3635                && s->resync_mb_y+1 == s->mb_y){
3636                 s->first_slice_line=0; 
3637             }
3638
3639             s->mb_skiped=0;
3640
3641             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
3642                 int next_block=0;
3643                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3644
3645                 copy_context_before_encode(&backup_s, s, -1);
3646                 backup_s.pb= s->pb;
3647                 best_s.data_partitioning= s->data_partitioning;
3648                 best_s.partitioned_frame= s->partitioned_frame;
3649                 if(s->data_partitioning){
3650                     backup_s.pb2= s->pb2;
3651                     backup_s.tex_pb= s->tex_pb;
3652                 }
3653
3654                 if(mb_type&MB_TYPE_INTER){
3655                     s->mv_dir = MV_DIR_FORWARD;
3656                     s->mv_type = MV_TYPE_16X16;
3657                     s->mb_intra= 0;
3658                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3659                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3660                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, 
3661                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3662                 }
3663                 if(mb_type&MB_TYPE_INTER4V){                 
3664                     s->mv_dir = MV_DIR_FORWARD;
3665                     s->mv_type = MV_TYPE_8X8;
3666                     s->mb_intra= 0;
3667                     for(i=0; i<4; i++){
3668                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3669                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3670                     }
3671                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, 
3672                                  &dmin, &next_block, 0, 0);
3673                 }
3674                 if(mb_type&MB_TYPE_FORWARD){
3675                     s->mv_dir = MV_DIR_FORWARD;
3676                     s->mv_type = MV_TYPE_16X16;
3677                     s->mb_intra= 0;
3678                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3679                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3680                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, 
3681                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3682                 }
3683                 if(mb_type&MB_TYPE_BACKWARD){
3684                     s->mv_dir = MV_DIR_BACKWARD;
3685                     s->mv_type = MV_TYPE_16X16;
3686                     s->mb_intra= 0;
3687                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3688                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3689                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
3690                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3691                 }
3692                 if(mb_type&MB_TYPE_BIDIR){
3693                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3694                     s->mv_type = MV_TYPE_16X16;
3695                     s->mb_intra= 0;
3696                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3697                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3698                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3699                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3700                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, 
3701                                  &dmin, &next_block, 0, 0);
3702                 }
3703                 if(mb_type&MB_TYPE_DIRECT){
3704                     int mx= s->b_direct_mv_table[xy][0];
3705                     int my= s->b_direct_mv_table[xy][1];
3706                     
3707                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3708                     s->mb_intra= 0;
3709 #ifdef CONFIG_RISKY
3710                     ff_mpeg4_set_direct_mv(s, mx, my);
3711 #endif
3712                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
3713                                  &dmin, &next_block, mx, my);
3714                 }
3715                 if(mb_type&MB_TYPE_INTRA){
3716                     s->mv_dir = 0;
3717                     s->mv_type = MV_TYPE_16X16;
3718                     s->mb_intra= 1;
3719                     s->mv[0][0][0] = 0;
3720                     s->mv[0][0][1] = 0;
3721                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, 
3722                                  &dmin, &next_block, 0, 0);
3723                     if(s->h263_pred || s->h263_aic){
3724                         if(best_s.mb_intra)
3725                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3726                         else
3727                             ff_clean_intra_table_entries(s); //old mode?
3728                     }
3729                 }
3730                 copy_context_after_encode(s, &best_s, -1);
3731                 
3732                 pb_bits_count= get_bit_count(&s->pb);
3733                 flush_put_bits(&s->pb);
3734                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3735                 s->pb= backup_s.pb;
3736                 
3737                 if(s->data_partitioning){
3738                     pb2_bits_count= get_bit_count(&s->pb2);
3739                     flush_put_bits(&s->pb2);
3740                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3741                     s->pb2= backup_s.pb2;
3742                     
3743                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3744                     flush_put_bits(&s->tex_pb);
3745                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3746                     s->tex_pb= backup_s.tex_pb;
3747                 }
3748                 s->last_bits= get_bit_count(&s->pb);
3749                 
3750                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3751                     ff_h263_update_motion_val(s);
3752         
3753                 if(next_block==0){
3754                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad     , s->linesize  ,16);
3755                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8);
3756                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8);
3757                 }
3758
3759                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3760                     MPV_decode_mb(s, s->block);
3761             } else {
3762                 int motion_x, motion_y;
3763                 int intra_score;
3764                 int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
3765                 
3766               if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
3767                 /* get luma score */
3768                 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
3769                     intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
3770                 }else{
3771                     uint8_t *dest_y;
3772
3773                     int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
3774                     mean*= 0x01010101;
3775                     
3776                     dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
3777                 
3778                     for(i=0; i<16; i++){
3779                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
3780                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
3781                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
3782                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
3783                     }
3784
3785                     s->mb_intra=1;
3786                     intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
3787                                         
3788 /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
3789                         s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
3790                         s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
3791                 }
3792                 
3793                 /* get chroma score */
3794                 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
3795                     int i;
3796                     
3797                     s->mb_intra=1;
3798                     for(i=1; i<3; i++){
3799                         uint8_t *dest_c;
3800                         int mean;
3801                         
3802                         if(s->out_format == FMT_H263){
3803                             mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
3804                         }else{
3805                             mean= (s->last_dc[i] + 4)>>3;
3806                         }
3807                         dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
3808                         
3809                         mean*= 0x01010101;
3810                         for(i=0; i<8; i++){
3811                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
3812                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
3813                         }
3814                         
3815                         intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
3816                     }                
3817                 }
3818
3819                 /* bias */
3820                 switch(s->avctx->mb_cmp&0xFF){
3821                 default:
3822                 case FF_CMP_SAD:
3823                     intra_score+= 32*s->qscale;
3824                     break;
3825                 case FF_CMP_SSE:
3826                     intra_score+= 24*s->qscale*s->qscale;
3827                     break;
3828                 case FF_CMP_SATD:
3829                     intra_score+= 96*s->qscale;
3830                     break;
3831                 case FF_CMP_DCT:
3832                     intra_score+= 48*s->qscale;
3833                     break;
3834                 case FF_CMP_BIT:
3835                     intra_score+= 16;
3836                     break;
3837                 case FF_CMP_PSNR:
3838                 case FF_CMP_RD:
3839                     intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
3840                     break;
3841                 }
3842
3843                 if(intra_score < inter_score)
3844                     mb_type= MB_TYPE_INTRA;
3845               }  
3846                 
3847                 s->mv_type=MV_TYPE_16X16;
3848                 // only one MB-Type possible
3849                 
3850                 switch(mb_type){
3851                 case MB_TYPE_INTRA:
3852                     s->mv_dir = 0;
3853                     s->mb_intra= 1;
3854                     motion_x= s->mv[0][0][0] = 0;
3855                     motion_y= s->mv[0][0][1] = 0;
3856                     break;
3857                 case MB_TYPE_INTER:
3858                     s->mv_dir = MV_DIR_FORWARD;
3859                     s->mb_intra= 0;
3860                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3861                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3862                     break;
3863                 case MB_TYPE_INTER4V:
3864                     s->mv_dir = MV_DIR_FORWARD;
3865                     s->mv_type = MV_TYPE_8X8;
3866                     s->mb_intra= 0;
3867                     for(i=0; i<4; i++){
3868                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3869                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3870                     }
3871                     motion_x= motion_y= 0;
3872                     break;
3873                 case MB_TYPE_DIRECT:
3874                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3875                     s->mb_intra= 0;
3876                     motion_x=s->b_direct_mv_table[xy][0];
3877                     motion_y=s->b_direct_mv_table[xy][1];
3878 #ifdef CONFIG_RISKY
3879                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3880 #endif
3881                     break;
3882                 case MB_TYPE_BIDIR:
3883                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3884                     s->mb_intra= 0;
3885                     motion_x=0;
3886                     motion_y=0;
3887                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3888                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3889                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3890                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3891                     break;
3892                 case MB_TYPE_BACKWARD:
3893                     s->mv_dir = MV_DIR_BACKWARD;
3894                     s->mb_intra= 0;
3895                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3896                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3897                     break;
3898                 case MB_TYPE_FORWARD:
3899                     s->mv_dir = MV_DIR_FORWARD;
3900                     s->mb_intra= 0;
3901                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3902                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3903 //                    printf(" %d %d ", motion_x, motion_y);
3904                     break;
3905                 default:
3906                     motion_x=motion_y=0; //gcc warning fix
3907                     printf("illegal MB type\n");
3908                 }
3909
3910                 encode_mb(s, motion_x, motion_y);
3911
3912                 // RAL: Update last macrobloc type
3913                 s->last_mv_dir = s->mv_dir;
3914             
3915                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3916                     ff_h263_update_motion_val(s);
3917
3918                 MPV_decode_mb(s, s->block);
3919             }
3920
3921             /* clean the MV table in IPS frames for direct mode in B frames */
3922             if(s->mb_intra /* && I,P,S_TYPE */){
3923                 s->p_mv_table[xy][0]=0;
3924                 s->p_mv_table[xy][1]=0;
3925             }
3926             
3927             if(s->flags&CODEC_FLAG_PSNR){
3928                 int w= 16;
3929                 int h= 16;
3930
3931                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3932                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3933
3934                 s->current_picture_ptr->error[0] += sse(
3935                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3936                     s->dest[0], w, h, s->linesize);
3937                 s->current_picture_ptr->error[1] += sse(
3938                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3939                     s->dest[1], w>>1, h>>1, s->uvlinesize);
3940                 s->current_picture_ptr->error[2] += sse(
3941                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3942                     s->dest[2], w>>1, h>>1, s->uvlinesize);
3943             }
3944 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
3945         }
3946     }
3947     emms_c();
3948
3949 #ifdef CONFIG_RISKY
3950     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3951         ff_mpeg4_merge_partitions(s);
3952
3953     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3954         msmpeg4_encode_ext_header(s);
3955
3956     if(s->codec_id==CODEC_ID_MPEG4) 
3957         ff_mpeg4_stuffing(&s->pb);
3958 #endif
3959
3960     //if (s->gob_number)
3961     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3962     
3963     /* Send the last GOB if RTP */    
3964     if (s->rtp_mode) {
3965         flush_put_bits(&s->pb);
3966         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3967         /* Call the RTP callback to send the last GOB */
3968         if (s->rtp_callback)
3969             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3970         s->ptr_lastgob = pbBufPtr(&s->pb);
3971         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3972     }
3973 }
3974
3975 static int dct_quantize_trellis_c(MpegEncContext *s, 
3976                         DCTELEM *block, int n,
3977                         int qscale, int *overflow){
3978     const int *qmat;
3979     const uint8_t *scantable= s->intra_scantable.scantable;
3980     int max=0;
3981     unsigned int threshold1, threshold2;
3982     int bias=0;
3983     int run_tab[65];
3984     int level_tab[65];
3985     int score_tab[65];
3986     int last_run=0;
3987     int last_level=0;
3988     int last_score= 0;
3989     int last_i= 0;
3990     int coeff[3][64];
3991     int coeff_count[64];
3992     int lambda, qmul, qadd, start_i, last_non_zero, i;
3993     const int esc_length= s->ac_esc_length;
3994     uint8_t * length;
3995     uint8_t * last_length;
3996     int score_limit=0;
3997     int left_limit= 0;
3998         
3999     s->dsp.fdct (block);
4000
4001     qmul= qscale*16;
4002     qadd= ((qscale-1)|1)*8;
4003
4004     if (s->mb_intra) {
4005         int q;
4006         if (!s->h263_aic) {
4007             if (n < 4)
4008                 q = s->y_dc_scale;
4009             else
4010                 q = s->c_dc_scale;
4011             q = q << 3;
4012         } else{
4013             /* For AIC we skip quant/dequant of INTRADC */
4014             q = 1 << 3;
4015             qadd=0;
4016         }
4017             
4018         /* note: block[0] is assumed to be positive */
4019         block[0] = (block[0] + (q >> 1)) / q;
4020         start_i = 1;
4021         last_non_zero = 0;
4022         qmat = s->q_intra_matrix[qscale];
4023         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4024             bias= 1<<(QMAT_SHIFT-1);
4025         length     = s->intra_ac_vlc_length;
4026         last_length= s->intra_ac_vlc_last_length;
4027     } else {
4028         start_i = 0;
4029         last_non_zero = -1;
4030         qmat = s->q_inter_matrix[qscale];
4031         length     = s->inter_ac_vlc_length;
4032         last_length= s->inter_ac_vlc_last_length;
4033     }
4034
4035     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4036     threshold2= (threshold1<<1);
4037
4038     for(i=start_i; i<64; i++) {
4039         const int j = scantable[i];
4040         const int k= i-start_i;
4041         int level = block[j];
4042         level = level * qmat[j];
4043
4044 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
4045 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
4046         if(((unsigned)(level+threshold1))>threshold2){
4047             if(level>0){
4048                 level= (bias + level)>>QMAT_SHIFT;
4049                 coeff[0][k]= level;
4050                 coeff[1][k]= level-1;
4051 //                coeff[2][k]= level-2;
4052             }else{
4053                 level= (bias - level)>>QMAT_SHIFT;
4054                 coeff[0][k]= -level;
4055                 coeff[1][k]= -level+1;
4056 //                coeff[2][k]= -level+2;
4057             }
4058             coeff_count[k]= FFMIN(level, 2);
4059             max |=level;
4060             last_non_zero = i;
4061         }else{
4062             coeff[0][k]= (level>>31)|1;
4063             coeff_count[k]= 1;
4064         }
4065     }
4066     
4067     *overflow= s->max_qcoeff < max; //overflow might have happend
4068     
4069     if(last_non_zero < start_i){
4070         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4071         return last_non_zero;
4072     }
4073
4074     lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune
4075         
4076     score_tab[0]= 0;
4077     for(i=0; i<=last_non_zero - start_i; i++){
4078         int level_index, run, j;
4079         const int dct_coeff= block[ scantable[i + start_i] ];
4080         const int zero_distoration= dct_coeff*dct_coeff;
4081         int best_score=256*256*256*120;
4082
4083         last_score += zero_distoration;
4084         for(level_index=0; level_index < coeff_count[i]; level_index++){
4085             int distoration;
4086             int level= coeff[level_index][i];
4087             int unquant_coeff;
4088             
4089             assert(level);
4090
4091             if(s->out_format == FMT_H263){
4092                 if(level>0){
4093                     unquant_coeff= level*qmul + qadd;
4094                 }else{
4095                     unquant_coeff= level*qmul - qadd;
4096                 }
4097             }else{ //MPEG1
4098                 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
4099                 if(s->mb_intra){
4100                     if (level < 0) {
4101                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
4102                         unquant_coeff = -((unquant_coeff - 1) | 1);
4103                     } else {
4104                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
4105                         unquant_coeff =   (unquant_coeff - 1) | 1;
4106                     }
4107                 }else{
4108                     if (level < 0) {
4109                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4110                         unquant_coeff = -((unquant_coeff - 1) | 1);
4111                     } else {
4112                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4113                         unquant_coeff =   (unquant_coeff - 1) | 1;
4114                     }
4115                 }
4116                 unquant_coeff<<= 3;
4117             }
4118
4119             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
4120             level+=64;
4121             if((level&(~127)) == 0){
4122                 for(run=0; run<=i - left_limit; run++){
4123                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4124                     score += score_tab[i-run];
4125                     
4126                     if(score < best_score){
4127                         best_score= 
4128                         score_tab[i+1]= score;
4129                         run_tab[i+1]= run;
4130                         level_tab[i+1]= level-64;
4131                     }
4132                 }
4133
4134                 if(s->out_format == FMT_H263){
4135                     for(run=0; run<=i - left_limit; run++){
4136                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4137                         score += score_tab[i-run];
4138                         if(score < last_score){
4139                             last_score= score;
4140                             last_run= run;
4141                             last_level= level-64;
4142                             last_i= i+1;
4143                         }
4144                     }
4145                 }
4146             }else{
4147                 distoration += esc_length*lambda;
4148                 for(run=0; run<=i - left_limit; run++){
4149                     int score= distoration + score_tab[i-run];
4150                     
4151                     if(score < best_score){
4152                         best_score= 
4153                         score_tab[i+1]= score;
4154                         run_tab[i+1]= run;
4155                         level_tab[i+1]= level-64;
4156                     }
4157                 }
4158
4159                 if(s->out_format == FMT_H263){
4160                     for(run=0; run<=i - left_limit; run++){
4161                         int score= distoration + score_tab[i-run];
4162                         if(score < last_score){
4163                             last_score= score;
4164                             last_run= run;
4165                             last_level= level-64;
4166                             last_i= i+1;
4167                         }
4168                     }
4169                 }
4170             }
4171         }
4172
4173         for(j=left_limit; j<=i; j++){
4174             score_tab[j] += zero_distoration;
4175         }
4176         score_limit+= zero_distoration;
4177         if(score_tab[i+1] < score_limit)
4178             score_limit= score_tab[i+1];
4179         
4180         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4181         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
4182     }
4183
4184         //FIXME add some cbp penalty
4185
4186     if(s->out_format != FMT_H263){
4187         last_score= 256*256*256*120;
4188         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
4189             int score= score_tab[i];
4190             if(i) score += lambda*2; //FIXME exacter?
4191
4192             if(score < last_score){
4193                 last_score= score;
4194                 last_i= i;
4195                 last_level= level_tab[i];
4196                 last_run= run_tab[i];
4197             }
4198         }
4199     }
4200     
4201     last_non_zero= last_i - 1 + start_i;
4202     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4203     
4204     if(last_non_zero < start_i)
4205         return last_non_zero;
4206     
4207     i= last_i;
4208     assert(last_level);
4209 //FIXME use permutated scantable
4210     block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
4211     i -= last_run + 1;
4212     
4213     for(;i>0 ; i -= run_tab[i] + 1){
4214         const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
4215     
4216         block[j]= level_tab[i];
4217         assert(block[j]);
4218     }
4219
4220     return last_non_zero;
4221 }
4222
4223 static int dct_quantize_c(MpegEncContext *s, 
4224                         DCTELEM *block, int n,
4225                         int qscale, int *overflow)
4226 {
4227     int i, j, level, last_non_zero, q;
4228     const int *qmat;
4229     const uint8_t *scantable= s->intra_scantable.scantable;
4230     int bias;
4231     int max=0;
4232     unsigned int threshold1, threshold2;
4233
4234     s->dsp.fdct (block);
4235
4236     if (s->mb_intra) {
4237         if (!s->h263_aic) {
4238             if (n < 4)
4239                 q = s->y_dc_scale;
4240             else
4241                 q = s->c_dc_scale;
4242             q = q << 3;
4243         } else
4244             /* For AIC we skip quant/dequant of INTRADC */
4245             q = 1 << 3;
4246             
4247         /* note: block[0] is assumed to be positive */
4248         block[0] = (block[0] + (q >> 1)) / q;
4249         i = 1;
4250         last_non_zero = 0;
4251         qmat = s->q_intra_matrix[qscale];
4252         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4253     } else {
4254         i = 0;
4255         last_non_zero = -1;
4256         qmat = s->q_inter_matrix[qscale];
4257         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4258     }
4259     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4260     threshold2= (threshold1<<1);
4261
4262     for(;i<64;i++) {
4263         j = scantable[i];
4264         level = block[j];
4265         level = level * qmat[j];
4266
4267 //        if(   bias+level >= (1<<QMAT_SHIFT)
4268 //           || bias-level >= (1<<QMAT_SHIFT)){
4269         if(((unsigned)(level+threshold1))>threshold2){
4270             if(level>0){
4271                 level= (bias + level)>>QMAT_SHIFT;
4272                 block[j]= level;
4273             }else{
4274                 level= (bias - level)>>QMAT_SHIFT;
4275                 block[j]= -level;
4276             }
4277             max |=level;
4278             last_non_zero = i;
4279         }else{
4280             block[j]=0;
4281         }
4282     }
4283     *overflow= s->max_qcoeff < max; //overflow might have happend
4284     
4285     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4286     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4287         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4288
4289     return last_non_zero;
4290 }
4291
4292 #endif //CONFIG_ENCODERS
4293
4294 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
4295                                    DCTELEM *block, int n, int qscale)
4296 {
4297     int i, level, nCoeffs;
4298     const uint16_t *quant_matrix;
4299
4300     nCoeffs= s->block_last_index[n];
4301     
4302     if (s->mb_intra) {
4303         if (n < 4) 
4304             block[0] = block[0] * s->y_dc_scale;
4305         else
4306             block[0] = block[0] * s->c_dc_scale;
4307         /* XXX: only mpeg1 */
4308         quant_matrix = s->intra_matrix;
4309         for(i=1;i<=nCoeffs;i++) {
4310             int j= s->intra_scantable.permutated[i];
4311             level = block[j];
4312             if (level) {
4313                 if (level < 0) {
4314                     level = -level;
4315                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4316                     level = (level - 1) | 1;
4317                     level = -level;
4318                 } else {
4319                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4320                     level = (level - 1) | 1;
4321                 }
4322 #ifdef PARANOID
4323                 if (level < -2048 || level > 2047)
4324                     fprintf(stderr, "unquant error %d %d\n", i, level);
4325 #endif
4326                 block[j] = level;
4327             }
4328         }
4329     } else {
4330         i = 0;
4331         quant_matrix = s->inter_matrix;
4332         for(;i<=nCoeffs;i++) {
4333             int j= s->intra_scantable.permutated[i];
4334             level = block[j];
4335             if (level) {
4336                 if (level < 0) {
4337                     level = -level;
4338                     level = (((level << 1) + 1) * qscale *
4339                              ((int) (quant_matrix[j]))) >> 4;
4340                     level = (level - 1) | 1;
4341                     level = -level;
4342                 } else {
4343                     level = (((level << 1) + 1) * qscale *
4344                              ((int) (quant_matrix[j]))) >> 4;
4345                     level = (level - 1) | 1;
4346                 }
4347 #ifdef PARANOID
4348                 if (level < -2048 || level > 2047)
4349                     fprintf(stderr, "unquant error %d %d\n", i, level);
4350 #endif
4351                 block[j] = level;
4352             }
4353         }
4354     }
4355 }
4356
4357 static void dct_unquantize_mpeg2_c(MpegEncContext *s, 
4358                                    DCTELEM *block, int n, int qscale)
4359 {
4360     int i, level, nCoeffs;
4361     const uint16_t *quant_matrix;
4362
4363     if(s->alternate_scan) nCoeffs= 63;
4364     else nCoeffs= s->block_last_index[n];
4365     
4366     if (s->mb_intra) {
4367         if (n < 4) 
4368             block[0] = block[0] * s->y_dc_scale;
4369         else
4370             block[0] = block[0] * s->c_dc_scale;
4371         quant_matrix = s->intra_matrix;
4372         for(i=1;i<=nCoeffs;i++) {
4373             int j= s->intra_scantable.permutated[i];
4374             level = block[j];
4375             if (level) {
4376                 if (level < 0) {
4377                     level = -level;
4378                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4379                     level = -level;
4380                 } else {
4381                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4382                 }
4383 #ifdef PARANOID
4384                 if (level < -2048 || level > 2047)
4385                     fprintf(stderr, "unquant error %d %d\n", i, level);
4386 #endif
4387                 block[j] = level;
4388             }
4389         }
4390     } else {
4391         int sum=-1;
4392         i = 0;
4393         quant_matrix = s->inter_matrix;
4394         for(;i<=nCoeffs;i++) {
4395             int j= s->intra_scantable.permutated[i];
4396             level = block[j];
4397             if (level) {
4398                 if (level < 0) {
4399                     level = -level;
4400                     level = (((level << 1) + 1) * qscale *
4401                              ((int) (quant_matrix[j]))) >> 4;
4402                     level = -level;
4403                 } else {
4404                     level = (((level << 1) + 1) * qscale *
4405                              ((int) (quant_matrix[j]))) >> 4;
4406                 }
4407 #ifdef PARANOID
4408                 if (level < -2048 || level > 2047)
4409                     fprintf(stderr, "unquant error %d %d\n", i, level);
4410 #endif
4411                 block[j] = level;
4412                 sum+=level;
4413             }
4414         }
4415         block[63]^=sum&1;
4416     }
4417 }
4418
4419
4420 static void dct_unquantize_h263_c(MpegEncContext *s, 
4421                                   DCTELEM *block, int n, int qscale)
4422 {
4423     int i, level, qmul, qadd;
4424     int nCoeffs;
4425     
4426     assert(s->block_last_index[n]>=0);
4427     
4428     qadd = (qscale - 1) | 1;
4429     qmul = qscale << 1;
4430     
4431     if (s->mb_intra) {
4432         if (!s->h263_aic) {
4433             if (n < 4) 
4434                 block[0] = block[0] * s->y_dc_scale;
4435             else
4436                 block[0] = block[0] * s->c_dc_scale;
4437         }else
4438             qadd = 0;
4439         i = 1;
4440         nCoeffs= 63; //does not allways use zigzag table 
4441     } else {
4442         i = 0;
4443         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4444     }
4445
4446     for(;i<=nCoeffs;i++) {
4447         level = block[i];
4448         if (level) {
4449             if (level < 0) {
4450                 level = level * qmul - qadd;
4451             } else {
4452                 level = level * qmul + qadd;
4453             }
4454 #ifdef PARANOID
4455                 if (level < -2048 || level > 2047)
4456                     fprintf(stderr, "unquant error %d %d\n", i, level);
4457 #endif
4458             block[i] = level;
4459         }
4460     }
4461 }
4462
4463
4464 static const AVOption mpeg4_options[] =
4465 {
4466     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
4467     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
4468                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
4469                        bit_rate_tolerance, 4, 240000000, 8000),
4470     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
4471     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
4472     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
4473                           rc_eq, "tex^qComp,option1,options2", 0),
4474     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
4475                        rc_min_rate, 4, 24000000, 0),
4476     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
4477                        rc_max_rate, 4, 24000000, 0),
4478     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
4479                           rc_buffer_aggressivity, 4, 24000000, 0),
4480     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
4481                           rc_initial_cplx, 0., 9999999., 0),
4482     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
4483                           i_quant_factor, 0., 0., 0),
4484     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
4485                           i_quant_factor, -999999., 999999., 0),
4486     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
4487                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
4488     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
4489                           lumi_masking, 0., 999999., 0),
4490     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
4491                           temporal_cplx_masking, 0., 999999., 0),
4492     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
4493                           spatial_cplx_masking, 0., 999999., 0),
4494     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
4495                           p_masking, 0., 999999., 0),
4496     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
4497                           dark_masking, 0., 999999., 0),
4498     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
4499                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
4500
4501     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
4502                        mb_qmin, 0, 8, 0),
4503     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
4504                        mb_qmin, 0, 8, 0),
4505
4506     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
4507                        me_cmp, 0, 24000000, 0),
4508     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
4509                        me_sub_cmp, 0, 24000000, 0),
4510
4511
4512     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
4513                        dia_size, 0, 24000000, 0),
4514     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
4515                        last_predictor_count, 0, 24000000, 0),
4516
4517     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
4518                        pre_me, 0, 24000000, 0),
4519     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
4520                        me_pre_cmp, 0, 24000000, 0),
4521
4522     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4523                        me_range, 0, 24000000, 0),
4524     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
4525                        pre_dia_size, 0, 24000000, 0),
4526     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
4527                        me_subpel_quality, 0, 24000000, 0),
4528     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4529                        me_range, 0, 24000000, 0),
4530     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
4531                         flags, CODEC_FLAG_PSNR, 0),
4532     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
4533                               rc_override),
4534     AVOPTION_SUB(avoptions_common),
4535     AVOPTION_END()
4536 };
4537
4538 #ifdef CONFIG_ENCODERS
4539
4540 AVCodec mpeg1video_encoder = {
4541     "mpeg1video",
4542     CODEC_TYPE_VIDEO,
4543     CODEC_ID_MPEG1VIDEO,
4544     sizeof(MpegEncContext),
4545     MPV_encode_init,
4546     MPV_encode_picture,
4547     MPV_encode_end,
4548 };
4549
4550 #ifdef CONFIG_RISKY
4551
4552 AVCodec mpeg2video_encoder = {
4553     "mpeg2video",
4554     CODEC_TYPE_VIDEO,
4555     CODEC_ID_MPEG2VIDEO,
4556     sizeof(MpegEncContext),
4557     MPV_encode_init,
4558     MPV_encode_picture,
4559     MPV_encode_end,
4560 };
4561
4562 AVCodec h263_encoder = {
4563     "h263",
4564     CODEC_TYPE_VIDEO,
4565     CODEC_ID_H263,
4566     sizeof(MpegEncContext),
4567     MPV_encode_init,
4568     MPV_encode_picture,
4569     MPV_encode_end,
4570 };
4571
4572 AVCodec h263p_encoder = {
4573     "h263p",
4574     CODEC_TYPE_VIDEO,
4575     CODEC_ID_H263P,
4576     sizeof(MpegEncContext),
4577     MPV_encode_init,
4578     MPV_encode_picture,
4579     MPV_encode_end,
4580 };
4581
4582 AVCodec flv_encoder = {
4583     "flv",
4584     CODEC_TYPE_VIDEO,
4585     CODEC_ID_FLV1,
4586     sizeof(MpegEncContext),
4587     MPV_encode_init,
4588     MPV_encode_picture,
4589     MPV_encode_end,
4590 };
4591
4592 AVCodec rv10_encoder = {
4593     "rv10",
4594     CODEC_TYPE_VIDEO,
4595     CODEC_ID_RV10,
4596     sizeof(MpegEncContext),
4597     MPV_encode_init,
4598     MPV_encode_picture,
4599     MPV_encode_end,
4600 };
4601
4602 AVCodec mpeg4_encoder = {
4603     "mpeg4",
4604     CODEC_TYPE_VIDEO,
4605     CODEC_ID_MPEG4,
4606     sizeof(MpegEncContext),
4607     MPV_encode_init,
4608     MPV_encode_picture,
4609     MPV_encode_end,
4610     .options = mpeg4_options,
4611 };
4612
4613 AVCodec msmpeg4v1_encoder = {
4614     "msmpeg4v1",
4615     CODEC_TYPE_VIDEO,
4616     CODEC_ID_MSMPEG4V1,
4617     sizeof(MpegEncContext),
4618     MPV_encode_init,
4619     MPV_encode_picture,
4620     MPV_encode_end,
4621     .options = mpeg4_options,
4622 };
4623
4624 AVCodec msmpeg4v2_encoder = {
4625     "msmpeg4v2",
4626     CODEC_TYPE_VIDEO,
4627     CODEC_ID_MSMPEG4V2,
4628     sizeof(MpegEncContext),
4629     MPV_encode_init,
4630     MPV_encode_picture,
4631     MPV_encode_end,
4632     .options = mpeg4_options,
4633 };
4634
4635 AVCodec msmpeg4v3_encoder = {
4636     "msmpeg4",
4637     CODEC_TYPE_VIDEO,
4638     CODEC_ID_MSMPEG4V3,
4639     sizeof(MpegEncContext),
4640     MPV_encode_init,
4641     MPV_encode_picture,
4642     MPV_encode_end,
4643     .options = mpeg4_options,
4644 };
4645
4646 AVCodec wmv1_encoder = {
4647     "wmv1",
4648     CODEC_TYPE_VIDEO,
4649     CODEC_ID_WMV1,
4650     sizeof(MpegEncContext),
4651     MPV_encode_init,
4652     MPV_encode_picture,
4653     MPV_encode_end,
4654     .options = mpeg4_options,
4655 };
4656
4657 #endif
4658
4659 AVCodec mjpeg_encoder = {
4660     "mjpeg",
4661     CODEC_TYPE_VIDEO,
4662     CODEC_ID_MJPEG,
4663     sizeof(MpegEncContext),
4664     MPV_encode_init,
4665     MPV_encode_picture,
4666     MPV_encode_end,
4667 };
4668
4669 #endif //CONFIG_ENCODERS
4670