]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
split intra / inter dequantization
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
20  */
21  
22 /**
23  * @file mpegvideo.c
24  * The simplest mpeg encoder (well, it was the simplest!).
25  */ 
26  
27 #include <limits.h>
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32
33 #ifdef USE_FASTMEMCPY
34 #include "fastmemcpy.h"
35 #endif
36
37 //#undef NDEBUG
38 //#include <assert.h>
39
40 #ifdef CONFIG_ENCODERS
41 static void encode_picture(MpegEncContext *s, int picture_number);
42 #endif //CONFIG_ENCODERS
43 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
44                                    DCTELEM *block, int n, int qscale);
45 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
46                                    DCTELEM *block, int n, int qscale);
47 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
48                                    DCTELEM *block, int n, int qscale);
49 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
50                                    DCTELEM *block, int n, int qscale);
51 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
52                                   DCTELEM *block, int n, int qscale);
53 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
54                                   DCTELEM *block, int n, int qscale);
55 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
56 #ifdef CONFIG_ENCODERS
57 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59 static int sse_mb(MpegEncContext *s);
60 #endif //CONFIG_ENCODERS
61
62 #ifdef HAVE_XVMC
63 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
64 extern void XVMC_field_end(MpegEncContext *s);
65 extern void XVMC_decode_mb(MpegEncContext *s);
66 #endif
67
68 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
69
70
71 /* enable all paranoid tests for rounding, overflows, etc... */
72 //#define PARANOID
73
74 //#define DEBUG
75
76
77 /* for jpeg fast DCT */
78 #define CONST_BITS 14
79
80 static const uint16_t aanscales[64] = {
81     /* precomputed values scaled up by 14 bits */
82     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
83     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
84     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
85     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
86     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
87     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
88     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
89     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
90 };
91
92 static const uint8_t h263_chroma_roundtab[16] = {
93 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
94     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
95 };
96
97 static const uint8_t ff_default_chroma_qscale_table[32]={
98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
99     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
100 };
101
102 #ifdef CONFIG_ENCODERS
103 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
104 static uint8_t default_fcode_tab[MAX_MV*2+1];
105
106 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
107
108 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
109                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
110 {
111     int qscale;
112
113     for(qscale=qmin; qscale<=qmax; qscale++){
114         int i;
115         if (dsp->fdct == ff_jpeg_fdct_islow 
116 #ifdef FAAN_POSTSCALE
117             || dsp->fdct == ff_faandct
118 #endif
119             ) {
120             for(i=0;i<64;i++) {
121                 const int j= dsp->idct_permutation[i];
122                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
123                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
124                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
125                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
126                 
127                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
128                                 (qscale * quant_matrix[j]));
129             }
130         } else if (dsp->fdct == fdct_ifast
131 #ifndef FAAN_POSTSCALE
132                    || dsp->fdct == ff_faandct
133 #endif
134                    ) {
135             for(i=0;i<64;i++) {
136                 const int j= dsp->idct_permutation[i];
137                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
138                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
139                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
140                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
141                 
142                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
143                                 (aanscales[i] * qscale * quant_matrix[j]));
144             }
145         } else {
146             for(i=0;i<64;i++) {
147                 const int j= dsp->idct_permutation[i];
148                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
149                    So 16           <= qscale * quant_matrix[i]             <= 7905
150                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
151                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
152                 */
153                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
154 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
155                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
156
157                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
158                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
159             }
160         }
161     }
162 }
163
164 static inline void update_qscale(MpegEncContext *s){
165     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
166     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
167     
168     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
169 }
170 #endif //CONFIG_ENCODERS
171
172 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
173     int i;
174     int end;
175     
176     st->scantable= src_scantable;
177
178     for(i=0; i<64; i++){
179         int j;
180         j = src_scantable[i];
181         st->permutated[i] = permutation[j];
182 #ifdef ARCH_POWERPC
183         st->inverse[j] = i;
184 #endif
185     }
186     
187     end=-1;
188     for(i=0; i<64; i++){
189         int j;
190         j = st->permutated[i];
191         if(j>end) end=j;
192         st->raster_end[i]= end;
193     }
194 }
195
196 #ifdef CONFIG_ENCODERS
197 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
198     int i;
199
200     if(matrix){
201         put_bits(pb, 1, 1);
202         for(i=0;i<64;i++) {
203             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
204         }
205     }else
206         put_bits(pb, 1, 0);
207 }
208 #endif //CONFIG_ENCODERS
209
210 /* init common dct for both encoder and decoder */
211 int DCT_common_init(MpegEncContext *s)
212 {
213     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
214     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
215     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
216     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
217     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
218     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
219
220 #ifdef CONFIG_ENCODERS
221     s->dct_quantize= dct_quantize_c;
222 #endif
223         
224 #ifdef HAVE_MMX
225     MPV_common_init_mmx(s);
226 #endif
227 #ifdef ARCH_ALPHA
228     MPV_common_init_axp(s);
229 #endif
230 #ifdef HAVE_MLIB
231     MPV_common_init_mlib(s);
232 #endif
233 #ifdef HAVE_MMI
234     MPV_common_init_mmi(s);
235 #endif
236 #ifdef ARCH_ARMV4L
237     MPV_common_init_armv4l(s);
238 #endif
239 #ifdef ARCH_POWERPC
240     MPV_common_init_ppc(s);
241 #endif
242
243 #ifdef CONFIG_ENCODERS
244     s->fast_dct_quantize= s->dct_quantize;
245
246     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
247         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
248     }
249
250 #endif //CONFIG_ENCODERS
251
252     /* load & permutate scantables
253        note: only wmv uses differnt ones 
254     */
255     ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
256     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
257     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
258     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
259
260     s->picture_structure= PICT_FRAME;
261     
262     return 0;
263 }
264
265 static void copy_picture(Picture *dst, Picture *src){
266     *dst = *src;
267     dst->type= FF_BUFFER_TYPE_COPY;
268 }
269
270 /**
271  * allocates a Picture
272  * The pixels are allocated/set by calling get_buffer() if shared=0
273  */
274 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
275     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
276     const int mb_array_size= s->mb_stride*s->mb_height;
277     const int b8_array_size= s->b8_stride*s->mb_height*2;
278     const int b4_array_size= s->b4_stride*s->mb_height*4;
279     int i;
280     
281     if(shared){
282         assert(pic->data[0]);
283         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
284         pic->type= FF_BUFFER_TYPE_SHARED;
285     }else{
286         int r;
287         
288         assert(!pic->data[0]);
289         
290         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
291         
292         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
293             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
294             return -1;
295         }
296
297         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
298             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
299             return -1;
300         }
301
302         if(pic->linesize[1] != pic->linesize[2]){
303             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride missmatch)\n");
304             return -1;
305         }
306
307         s->linesize  = pic->linesize[0];
308         s->uvlinesize= pic->linesize[1];
309     }
310     
311     if(pic->qscale_table==NULL){
312         if (s->encoding) {        
313             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
314             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
315             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
316         }
317
318         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
319         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
320         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
321         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
322         if(s->out_format == FMT_H264){
323             for(i=0; i<2; i++){
324                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+1)  * sizeof(uint16_t))
325                 pic->motion_val[i]= pic->motion_val_base[i]+1;
326                 CHECKED_ALLOCZ(pic->ref_index[i] , b8_array_size * sizeof(uint8_t))
327             }
328             pic->motion_subsample_log2= 2;
329         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&(FF_DEBUG_VIS_MV|FF_DEBUG_MV))){
330             for(i=0; i<2; i++){
331                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+1) * sizeof(uint16_t)*2) //FIXME
332                 pic->motion_val[i]= pic->motion_val_base[i]+1;
333             }
334             pic->motion_subsample_log2= 3;
335         }
336         pic->qstride= s->mb_stride;
337         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
338     }
339
340     //it might be nicer if the application would keep track of these but it would require a API change
341     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
342     s->prev_pict_types[0]= s->pict_type;
343     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
344         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
345     
346     return 0;
347 fail: //for the CHECKED_ALLOCZ macro
348     return -1;
349 }
350
351 /**
352  * deallocates a picture
353  */
354 static void free_picture(MpegEncContext *s, Picture *pic){
355     int i;
356
357     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
358         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
359     }
360
361     av_freep(&pic->mb_var);
362     av_freep(&pic->mc_mb_var);
363     av_freep(&pic->mb_mean);
364     av_freep(&pic->mbskip_table);
365     av_freep(&pic->qscale_table);
366     av_freep(&pic->mb_type_base);
367     av_freep(&pic->pan_scan);
368     pic->mb_type= NULL;
369     for(i=0; i<2; i++){
370         av_freep(&pic->motion_val_base[i]);
371         av_freep(&pic->ref_index[i]);
372     }
373     
374     if(pic->type == FF_BUFFER_TYPE_SHARED){
375         for(i=0; i<4; i++){
376             pic->base[i]=
377             pic->data[i]= NULL;
378         }
379         pic->type= 0;        
380     }
381 }
382
383 /* init common structure for both encoder and decoder */
384 int MPV_common_init(MpegEncContext *s)
385 {
386     int y_size, c_size, yc_size, i, mb_array_size, x, y;
387
388     dsputil_init(&s->dsp, s->avctx);
389     DCT_common_init(s);
390
391     s->flags= s->avctx->flags;
392
393     s->mb_width  = (s->width  + 15) / 16;
394     s->mb_height = (s->height + 15) / 16;
395     s->mb_stride = s->mb_width + 1;
396     s->b8_stride = s->mb_width*2 + 1;
397     s->b4_stride = s->mb_width*4 + 1;
398     mb_array_size= s->mb_height * s->mb_stride;
399
400     /* set default edge pos, will be overriden in decode_header if needed */
401     s->h_edge_pos= s->mb_width*16;
402     s->v_edge_pos= s->mb_height*16;
403
404     s->mb_num = s->mb_width * s->mb_height;
405     
406     s->block_wrap[0]=
407     s->block_wrap[1]=
408     s->block_wrap[2]=
409     s->block_wrap[3]= s->mb_width*2 + 2;
410     s->block_wrap[4]=
411     s->block_wrap[5]= s->mb_width + 2;
412
413     s->y_dc_scale_table=
414     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
415     s->chroma_qscale_table= ff_default_chroma_qscale_table;
416     if (!s->encoding)
417         s->progressive_sequence= 1;
418     s->progressive_frame= 1;
419
420     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
421     c_size = (s->mb_width + 2) * (s->mb_height + 2);
422     yc_size = y_size + 2 * c_size;
423
424     /* convert fourcc to upper case */
425     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
426                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
427                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
428                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
429
430     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)          
431                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
432                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
433                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
434
435     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
436     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
437
438     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
439
440     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
441     for(y=0; y<s->mb_height; y++){
442         for(x=0; x<s->mb_width; x++){
443             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
444         }
445     }
446     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
447     
448     if (s->encoding) {
449         int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
450
451         /* Allocate MV tables */
452         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
453         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
454         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
455         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
456         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
457         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
458         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
459         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
460         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
461         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
462         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
463         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
464
465         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
466         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
467         
468         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
469         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
470
471         if(s->codec_id==CODEC_ID_MPEG4){
472             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
473             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
474         }
475         
476         if(s->msmpeg4_version){
477             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
478         }
479         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
480
481         /* Allocate MB type table */
482         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
483         
484         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
485         
486         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
487         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
488         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
489         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
490         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
491         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
492         
493         if(s->avctx->noise_reduction){
494             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
495             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
496         }
497     }
498     CHECKED_ALLOCZ(s->blocks, 64*6*2 * sizeof(DCTELEM))
499         
500     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
501
502     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
503     
504     if(s->codec_id==CODEC_ID_MPEG4){
505         /* interlaced direct mode decoding tables */
506         CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
507         CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
508     }
509     if (s->out_format == FMT_H263) {
510         /* ac values */
511         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
512         s->ac_val[1] = s->ac_val[0] + y_size;
513         s->ac_val[2] = s->ac_val[1] + c_size;
514         
515         /* cbp values */
516         CHECKED_ALLOCZ(s->coded_block, y_size);
517         
518         /* divx501 bitstream reorder buffer */
519         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
520
521         /* cbp, ac_pred, pred_dir */
522         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
523         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
524     }
525     
526     if (s->h263_pred || s->h263_plus || !s->encoding) {
527         /* dc values */
528         //MN: we need these for error resilience of intra-frames
529         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(int16_t));
530         s->dc_val[1] = s->dc_val[0] + y_size;
531         s->dc_val[2] = s->dc_val[1] + c_size;
532         for(i=0;i<yc_size;i++)
533             s->dc_val[0][i] = 1024;
534     }
535
536     /* which mb is a intra block */
537     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
538     memset(s->mbintra_table, 1, mb_array_size);
539     
540     /* default structure is frame */
541     s->picture_structure = PICT_FRAME;
542     
543     /* init macroblock skip table */
544     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
545     //Note the +1 is for a quicker mpeg4 slice_end detection
546     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
547     
548     s->block= s->blocks[0];
549
550     for(i=0;i<12;i++){
551         s->pblocks[i] = (short *)(&s->block[i]);
552     }
553
554     s->parse_context.state= -1;
555
556     s->context_initialized = 1;
557     return 0;
558  fail:
559     MPV_common_end(s);
560     return -1;
561 }
562
563
564 //extern int sads;
565
566 /* init common structure for both encoder and decoder */
567 void MPV_common_end(MpegEncContext *s)
568 {
569     int i;
570
571     av_freep(&s->parse_context.buffer);
572     s->parse_context.buffer_size=0;
573
574     av_freep(&s->mb_type);
575     av_freep(&s->p_mv_table_base);
576     av_freep(&s->b_forw_mv_table_base);
577     av_freep(&s->b_back_mv_table_base);
578     av_freep(&s->b_bidir_forw_mv_table_base);
579     av_freep(&s->b_bidir_back_mv_table_base);
580     av_freep(&s->b_direct_mv_table_base);
581     s->p_mv_table= NULL;
582     s->b_forw_mv_table= NULL;
583     s->b_back_mv_table= NULL;
584     s->b_bidir_forw_mv_table= NULL;
585     s->b_bidir_back_mv_table= NULL;
586     s->b_direct_mv_table= NULL;
587     
588     av_freep(&s->dc_val[0]);
589     av_freep(&s->ac_val[0]);
590     av_freep(&s->coded_block);
591     av_freep(&s->mbintra_table);
592     av_freep(&s->cbp_table);
593     av_freep(&s->pred_dir_table);
594     av_freep(&s->me.scratchpad);
595     av_freep(&s->me.map);
596     av_freep(&s->me.score_map);
597     
598     av_freep(&s->mbskip_table);
599     av_freep(&s->prev_pict_types);
600     av_freep(&s->bitstream_buffer);
601     av_freep(&s->tex_pb_buffer);
602     av_freep(&s->pb2_buffer);
603     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
604     av_freep(&s->field_mv_table);
605     av_freep(&s->field_select_table);
606     av_freep(&s->avctx->stats_out);
607     av_freep(&s->ac_stats);
608     av_freep(&s->error_status_table);
609     av_freep(&s->mb_index2xy);
610     av_freep(&s->lambda_table);
611     av_freep(&s->q_intra_matrix);
612     av_freep(&s->q_inter_matrix);
613     av_freep(&s->q_intra_matrix16);
614     av_freep(&s->q_inter_matrix16);
615     av_freep(&s->blocks);
616     av_freep(&s->input_picture);
617     av_freep(&s->reordered_input_picture);
618     av_freep(&s->dct_error_sum);
619     av_freep(&s->dct_offset);
620
621     if(s->picture){
622         for(i=0; i<MAX_PICTURE_COUNT; i++){
623             free_picture(s, &s->picture[i]);
624         }
625     }
626     av_freep(&s->picture);
627     avcodec_default_free_buffers(s->avctx);
628     s->context_initialized = 0;
629     s->last_picture_ptr=
630     s->next_picture_ptr=
631     s->current_picture_ptr= NULL;
632 }
633
634 #ifdef CONFIG_ENCODERS
635
636 /* init video encoder */
637 int MPV_encode_init(AVCodecContext *avctx)
638 {
639     MpegEncContext *s = avctx->priv_data;
640     int i, dummy;
641     int chroma_h_shift, chroma_v_shift;
642
643     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
644
645     s->bit_rate = avctx->bit_rate;
646     s->width = avctx->width;
647     s->height = avctx->height;
648     if(avctx->gop_size > 600){
649         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
650         avctx->gop_size=600;
651     }
652     s->gop_size = avctx->gop_size;
653     s->avctx = avctx;
654     s->flags= avctx->flags;
655     s->max_b_frames= avctx->max_b_frames;
656     s->codec_id= avctx->codec->id;
657     s->luma_elim_threshold  = avctx->luma_elim_threshold;
658     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
659     s->strict_std_compliance= avctx->strict_std_compliance;
660     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
661     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
662     s->mpeg_quant= avctx->mpeg_quant;
663     s->rtp_mode= !!avctx->rtp_payload_size;
664
665     if (s->gop_size <= 1) {
666         s->intra_only = 1;
667         s->gop_size = 12;
668     } else {
669         s->intra_only = 0;
670     }
671
672     s->me_method = avctx->me_method;
673
674     /* Fixed QSCALE */
675     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
676     
677     s->adaptive_quant= (   s->avctx->lumi_masking
678                         || s->avctx->dark_masking
679                         || s->avctx->temporal_cplx_masking 
680                         || s->avctx->spatial_cplx_masking
681                         || s->avctx->p_masking
682                         || (s->flags&CODEC_FLAG_QP_RD))
683                        && !s->fixed_qscale;
684     
685     s->obmc= (s->flags & CODEC_FLAG_OBMC);
686     s->loop_filter= (s->flags & CODEC_FLAG_LOOP_FILTER);
687
688     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
689        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
690         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
691         return -1;
692     }
693     
694     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
695         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
696         return -1;
697     }
698     
699     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
700         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
701         return -1;
702     }
703     
704     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
705         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
706         return -1;
707     }
708
709     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
710         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
711         return -1;
712     }
713     
714     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
715         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
716         return -1;
717     }
718     
719     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
720         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supporetd by codec\n");
721         return -1;
722     }
723         
724     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
725         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
726         return -1;
727     }
728
729     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
730         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
731         return -1;
732     }
733     
734     if(s->codec_id==CODEC_ID_MJPEG){
735         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
736         s->inter_quant_bias= 0;
737     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
738         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
739         s->inter_quant_bias= 0;
740     }else{
741         s->intra_quant_bias=0;
742         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
743     }
744     
745     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
746         s->intra_quant_bias= avctx->intra_quant_bias;
747     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
748         s->inter_quant_bias= avctx->inter_quant_bias;
749         
750     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
751
752     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
753     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
754
755     switch(avctx->codec->id) {
756     case CODEC_ID_MPEG1VIDEO:
757         s->out_format = FMT_MPEG1;
758         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
759         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
760         break;
761     case CODEC_ID_MPEG2VIDEO:
762         s->out_format = FMT_MPEG1;
763         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
764         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
765         s->rtp_mode= 1;
766         break;
767     case CODEC_ID_LJPEG:
768     case CODEC_ID_MJPEG:
769         s->out_format = FMT_MJPEG;
770         s->intra_only = 1; /* force intra only for jpeg */
771         s->mjpeg_write_tables = 1; /* write all tables */
772         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
773         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
774         s->mjpeg_vsample[1] = 1;
775         s->mjpeg_vsample[2] = 1; 
776         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
777         s->mjpeg_hsample[1] = 1; 
778         s->mjpeg_hsample[2] = 1; 
779         if (mjpeg_init(s) < 0)
780             return -1;
781         avctx->delay=0;
782         s->low_delay=1;
783         break;
784 #ifdef CONFIG_RISKY
785     case CODEC_ID_H263:
786         if (h263_get_picture_format(s->width, s->height) == 7) {
787             av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
788             return -1;
789         }
790         s->out_format = FMT_H263;
791         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
792         avctx->delay=0;
793         s->low_delay=1;
794         break;
795     case CODEC_ID_H263P:
796         s->out_format = FMT_H263;
797         s->h263_plus = 1;
798         /* Fx */
799         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
800         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
801         s->modified_quant= s->h263_aic;
802         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
803         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
804         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
805         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
806         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
807
808         /* /Fx */
809         /* These are just to be sure */
810         avctx->delay=0;
811         s->low_delay=1;
812         break;
813     case CODEC_ID_FLV1:
814         s->out_format = FMT_H263;
815         s->h263_flv = 2; /* format = 1; 11-bit codes */
816         s->unrestricted_mv = 1;
817         s->rtp_mode=0; /* don't allow GOB */
818         avctx->delay=0;
819         s->low_delay=1;
820         break;
821     case CODEC_ID_RV10:
822         s->out_format = FMT_H263;
823         avctx->delay=0;
824         s->low_delay=1;
825         break;
826     case CODEC_ID_MPEG4:
827         s->out_format = FMT_H263;
828         s->h263_pred = 1;
829         s->unrestricted_mv = 1;
830         s->low_delay= s->max_b_frames ? 0 : 1;
831         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
832         break;
833     case CODEC_ID_MSMPEG4V1:
834         s->out_format = FMT_H263;
835         s->h263_msmpeg4 = 1;
836         s->h263_pred = 1;
837         s->unrestricted_mv = 1;
838         s->msmpeg4_version= 1;
839         avctx->delay=0;
840         s->low_delay=1;
841         break;
842     case CODEC_ID_MSMPEG4V2:
843         s->out_format = FMT_H263;
844         s->h263_msmpeg4 = 1;
845         s->h263_pred = 1;
846         s->unrestricted_mv = 1;
847         s->msmpeg4_version= 2;
848         avctx->delay=0;
849         s->low_delay=1;
850         break;
851     case CODEC_ID_MSMPEG4V3:
852         s->out_format = FMT_H263;
853         s->h263_msmpeg4 = 1;
854         s->h263_pred = 1;
855         s->unrestricted_mv = 1;
856         s->msmpeg4_version= 3;
857         s->flipflop_rounding=1;
858         avctx->delay=0;
859         s->low_delay=1;
860         break;
861     case CODEC_ID_WMV1:
862         s->out_format = FMT_H263;
863         s->h263_msmpeg4 = 1;
864         s->h263_pred = 1;
865         s->unrestricted_mv = 1;
866         s->msmpeg4_version= 4;
867         s->flipflop_rounding=1;
868         avctx->delay=0;
869         s->low_delay=1;
870         break;
871     case CODEC_ID_WMV2:
872         s->out_format = FMT_H263;
873         s->h263_msmpeg4 = 1;
874         s->h263_pred = 1;
875         s->unrestricted_mv = 1;
876         s->msmpeg4_version= 5;
877         s->flipflop_rounding=1;
878         avctx->delay=0;
879         s->low_delay=1;
880         break;
881 #endif
882     default:
883         return -1;
884     }
885     
886     { /* set up some save defaults, some codecs might override them later */
887         static int done=0;
888         if(!done){
889             int i;
890             done=1;
891
892             default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
893             memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
894             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
895
896             for(i=-16; i<16; i++){
897                 default_fcode_tab[i + MAX_MV]= 1;
898             }
899         }
900     }
901     s->me.mv_penalty= default_mv_penalty;
902     s->fcode_tab= default_fcode_tab;
903  
904     /* dont use mv_penalty table for crap MV as it would be confused */
905     //FIXME remove after fixing / removing old ME
906     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
907
908     s->encoding = 1;
909
910     /* init */
911     if (MPV_common_init(s) < 0)
912         return -1;
913
914     if(s->modified_quant)
915         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
916     s->progressive_frame= 
917     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
918     
919     ff_init_me(s);
920
921 #ifdef CONFIG_ENCODERS
922 #ifdef CONFIG_RISKY
923     if (s->out_format == FMT_H263)
924         h263_encode_init(s);
925     if(s->msmpeg4_version)
926         ff_msmpeg4_encode_init(s);
927 #endif
928     if (s->out_format == FMT_MPEG1)
929         ff_mpeg1_encode_init(s);
930 #endif
931
932     /* init default q matrix */
933     for(i=0;i<64;i++) {
934         int j= s->dsp.idct_permutation[i];
935 #ifdef CONFIG_RISKY
936         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
937             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
938             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
939         }else if(s->out_format == FMT_H263){
940             s->intra_matrix[j] =
941             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
942         }else
943 #endif
944         { /* mpeg1/2 */
945             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
946             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
947         }
948         if(s->avctx->intra_matrix)
949             s->intra_matrix[j] = s->avctx->intra_matrix[i];
950         if(s->avctx->inter_matrix)
951             s->inter_matrix[j] = s->avctx->inter_matrix[i];
952     }
953
954     /* precompute matrix */
955     /* for mjpeg, we do include qscale in the matrix */
956     if (s->out_format != FMT_MJPEG) {
957         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
958                        s->intra_matrix, s->intra_quant_bias, 1, 31);
959         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, 
960                        s->inter_matrix, s->inter_quant_bias, 1, 31);
961     }
962
963     if(ff_rate_control_init(s) < 0)
964         return -1;
965
966     s->picture_number = 0;
967     s->picture_in_gop_number = 0;
968     s->fake_picture_number = 0;
969     /* motion detector init */
970     s->f_code = 1;
971     s->b_code = 1;
972
973     return 0;
974 }
975
976 int MPV_encode_end(AVCodecContext *avctx)
977 {
978     MpegEncContext *s = avctx->priv_data;
979
980 #ifdef STATS
981     print_stats();
982 #endif
983
984     ff_rate_control_uninit(s);
985
986     MPV_common_end(s);
987     if (s->out_format == FMT_MJPEG)
988         mjpeg_close(s);
989
990     av_freep(&avctx->extradata);
991       
992     return 0;
993 }
994
995 #endif //CONFIG_ENCODERS
996
997 void init_rl(RLTable *rl)
998 {
999     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1000     uint8_t index_run[MAX_RUN+1];
1001     int last, run, level, start, end, i;
1002
1003     /* compute max_level[], max_run[] and index_run[] */
1004     for(last=0;last<2;last++) {
1005         if (last == 0) {
1006             start = 0;
1007             end = rl->last;
1008         } else {
1009             start = rl->last;
1010             end = rl->n;
1011         }
1012
1013         memset(max_level, 0, MAX_RUN + 1);
1014         memset(max_run, 0, MAX_LEVEL + 1);
1015         memset(index_run, rl->n, MAX_RUN + 1);
1016         for(i=start;i<end;i++) {
1017             run = rl->table_run[i];
1018             level = rl->table_level[i];
1019             if (index_run[run] == rl->n)
1020                 index_run[run] = i;
1021             if (level > max_level[run])
1022                 max_level[run] = level;
1023             if (run > max_run[level])
1024                 max_run[level] = run;
1025         }
1026         rl->max_level[last] = av_malloc(MAX_RUN + 1);
1027         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1028         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1029         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1030         rl->index_run[last] = av_malloc(MAX_RUN + 1);
1031         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1032     }
1033 }
1034
1035 /* draw the edges of width 'w' of an image of size width, height */
1036 //FIXME check that this is ok for mpeg4 interlaced
1037 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1038 {
1039     uint8_t *ptr, *last_line;
1040     int i;
1041
1042     last_line = buf + (height - 1) * wrap;
1043     for(i=0;i<w;i++) {
1044         /* top and bottom */
1045         memcpy(buf - (i + 1) * wrap, buf, width);
1046         memcpy(last_line + (i + 1) * wrap, last_line, width);
1047     }
1048     /* left and right */
1049     ptr = buf;
1050     for(i=0;i<height;i++) {
1051         memset(ptr - w, ptr[0], w);
1052         memset(ptr + width, ptr[width-1], w);
1053         ptr += wrap;
1054     }
1055     /* corners */
1056     for(i=0;i<w;i++) {
1057         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1058         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1059         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1060         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1061     }
1062 }
1063
1064 int ff_find_unused_picture(MpegEncContext *s, int shared){
1065     int i;
1066     
1067     if(shared){
1068         for(i=0; i<MAX_PICTURE_COUNT; i++){
1069             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1070         }
1071     }else{
1072         for(i=0; i<MAX_PICTURE_COUNT; i++){
1073             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1074         }
1075         for(i=0; i<MAX_PICTURE_COUNT; i++){
1076             if(s->picture[i].data[0]==NULL) return i;
1077         }
1078     }
1079
1080     assert(0);
1081     return -1;
1082 }
1083
1084 static void update_noise_reduction(MpegEncContext *s){
1085     int intra, i;
1086
1087     for(intra=0; intra<2; intra++){
1088         if(s->dct_count[intra] > (1<<16)){
1089             for(i=0; i<64; i++){
1090                 s->dct_error_sum[intra][i] >>=1;
1091             }
1092             s->dct_count[intra] >>= 1;
1093         }
1094         
1095         for(i=0; i<64; i++){
1096             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1097         }
1098     }
1099 }
1100
1101 /**
1102  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1103  */
1104 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1105 {
1106     int i;
1107     AVFrame *pic;
1108     s->mb_skiped = 0;
1109
1110     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1111
1112     /* mark&release old frames */
1113     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr->data[0]) {
1114         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1115
1116         /* release forgotten pictures */
1117         /* if(mpeg124/h263) */
1118         if(!s->encoding){
1119             for(i=0; i<MAX_PICTURE_COUNT; i++){
1120                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1121                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1122                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
1123                 }
1124             }
1125         }
1126     }
1127 alloc:
1128     if(!s->encoding){
1129         /* release non refernce frames */
1130         for(i=0; i<MAX_PICTURE_COUNT; i++){
1131             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1132                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1133             }
1134         }
1135
1136         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1137             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1138         else{
1139             i= ff_find_unused_picture(s, 0);
1140             pic= (AVFrame*)&s->picture[i];
1141         }
1142
1143         pic->reference= s->pict_type != B_TYPE ? 3 : 0;
1144
1145         if(s->current_picture_ptr) //FIXME broken, we need a coded_picture_number in MpegEncContext
1146             pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
1147         
1148         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1149             return -1;
1150
1151         s->current_picture_ptr= (Picture*)pic;
1152         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1153         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1154     }
1155
1156     s->current_picture_ptr->pict_type= s->pict_type;
1157 //    if(s->flags && CODEC_FLAG_QSCALE) 
1158   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1159     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1160
1161     copy_picture(&s->current_picture, s->current_picture_ptr);
1162   
1163   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1164     if (s->pict_type != B_TYPE) {
1165         s->last_picture_ptr= s->next_picture_ptr;
1166         s->next_picture_ptr= s->current_picture_ptr;
1167     }
1168     
1169     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1170     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1171     
1172     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1173         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1174         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1175         goto alloc;
1176     }
1177
1178     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1179
1180     if(s->picture_structure!=PICT_FRAME){
1181         int i;
1182         for(i=0; i<4; i++){
1183             if(s->picture_structure == PICT_BOTTOM_FIELD){
1184                  s->current_picture.data[i] += s->current_picture.linesize[i];
1185             } 
1186             s->current_picture.linesize[i] *= 2;
1187             s->last_picture.linesize[i] *=2;
1188             s->next_picture.linesize[i] *=2;
1189         }
1190     }
1191   }
1192    
1193     s->hurry_up= s->avctx->hurry_up;
1194     s->error_resilience= avctx->error_resilience;
1195
1196     /* set dequantizer, we cant do it during init as it might change for mpeg4
1197        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1198     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1199         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1200         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1201     }else if(s->out_format == FMT_H263){
1202         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1203         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1204     }else{
1205         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1206         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1207     }
1208
1209     if(s->dct_error_sum){
1210         assert(s->avctx->noise_reduction && s->encoding);
1211
1212         update_noise_reduction(s);
1213     }
1214         
1215 #ifdef HAVE_XVMC
1216     if(s->avctx->xvmc_acceleration)
1217         return XVMC_field_start(s, avctx);
1218 #endif
1219     return 0;
1220 }
1221
1222 /* generic function for encode/decode called after a frame has been coded/decoded */
1223 void MPV_frame_end(MpegEncContext *s)
1224 {
1225     int i;
1226     /* draw edge for correct motion prediction if outside */
1227 #ifdef HAVE_XVMC
1228 //just to make sure that all data is rendered.
1229     if(s->avctx->xvmc_acceleration){
1230         XVMC_field_end(s);
1231     }else
1232 #endif
1233     if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1234             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1235             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1236             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1237     }
1238     emms_c();
1239     
1240     s->last_pict_type    = s->pict_type;
1241     if(s->pict_type!=B_TYPE){
1242         s->last_non_b_pict_type= s->pict_type;
1243     }
1244 #if 0
1245         /* copy back current_picture variables */
1246     for(i=0; i<MAX_PICTURE_COUNT; i++){
1247         if(s->picture[i].data[0] == s->current_picture.data[0]){
1248             s->picture[i]= s->current_picture;
1249             break;
1250         }    
1251     }
1252     assert(i<MAX_PICTURE_COUNT);
1253 #endif    
1254
1255     if(s->encoding){
1256         /* release non refernce frames */
1257         for(i=0; i<MAX_PICTURE_COUNT; i++){
1258             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1259                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1260             }
1261         }
1262     }
1263     // clear copies, to avoid confusion
1264 #if 0
1265     memset(&s->last_picture, 0, sizeof(Picture));
1266     memset(&s->next_picture, 0, sizeof(Picture));
1267     memset(&s->current_picture, 0, sizeof(Picture));
1268 #endif
1269 }
1270
1271 /**
1272  * draws an line from (ex, ey) -> (sx, sy).
1273  * @param w width of the image
1274  * @param h height of the image
1275  * @param stride stride/linesize of the image
1276  * @param color color of the arrow
1277  */
1278 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1279     int t, x, y, f;
1280     
1281     sx= clip(sx, 0, w-1);
1282     sy= clip(sy, 0, h-1);
1283     ex= clip(ex, 0, w-1);
1284     ey= clip(ey, 0, h-1);
1285     
1286     buf[sy*stride + sx]+= color;
1287     
1288     if(ABS(ex - sx) > ABS(ey - sy)){
1289         if(sx > ex){
1290             t=sx; sx=ex; ex=t;
1291             t=sy; sy=ey; ey=t;
1292         }
1293         buf+= sx + sy*stride;
1294         ex-= sx;
1295         f= ((ey-sy)<<16)/ex;
1296         for(x= 0; x <= ex; x++){
1297             y= ((x*f) + (1<<15))>>16;
1298             buf[y*stride + x]+= color;
1299         }
1300     }else{
1301         if(sy > ey){
1302             t=sx; sx=ex; ex=t;
1303             t=sy; sy=ey; ey=t;
1304         }
1305         buf+= sx + sy*stride;
1306         ey-= sy;
1307         if(ey) f= ((ex-sx)<<16)/ey;
1308         else   f= 0;
1309         for(y= 0; y <= ey; y++){
1310             x= ((y*f) + (1<<15))>>16;
1311             buf[y*stride + x]+= color;
1312         }
1313     }
1314 }
1315
1316 /**
1317  * draws an arrow from (ex, ey) -> (sx, sy).
1318  * @param w width of the image
1319  * @param h height of the image
1320  * @param stride stride/linesize of the image
1321  * @param color color of the arrow
1322  */
1323 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
1324     int dx,dy;
1325
1326     sx= clip(sx, -100, w+100);
1327     sy= clip(sy, -100, h+100);
1328     ex= clip(ex, -100, w+100);
1329     ey= clip(ey, -100, h+100);
1330     
1331     dx= ex - sx;
1332     dy= ey - sy;
1333     
1334     if(dx*dx + dy*dy > 3*3){
1335         int rx=  dx + dy;
1336         int ry= -dx + dy;
1337         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1338         
1339         //FIXME subpixel accuracy
1340         rx= ROUNDED_DIV(rx*3<<4, length);
1341         ry= ROUNDED_DIV(ry*3<<4, length);
1342         
1343         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1344         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1345     }
1346     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1347 }
1348
1349 /**
1350  * prints debuging info for the given picture.
1351  */
1352 void ff_print_debug_info(MpegEncContext *s, Picture *pict){
1353
1354     if(!pict || !pict->mb_type) return;
1355
1356     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1357         int x,y;
1358
1359         for(y=0; y<s->mb_height; y++){
1360             for(x=0; x<s->mb_width; x++){
1361                 if(s->avctx->debug&FF_DEBUG_SKIP){
1362                     int count= s->mbskip_table[x + y*s->mb_stride];
1363                     if(count>9) count=9;
1364                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1365                 }
1366                 if(s->avctx->debug&FF_DEBUG_QP){
1367                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1368                 }
1369                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1370                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1371                     
1372                     //Type & MV direction
1373                     if(IS_PCM(mb_type))
1374                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1375                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1376                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1377                     else if(IS_INTRA4x4(mb_type))
1378                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1379                     else if(IS_INTRA16x16(mb_type))
1380                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1381                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1382                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1383                     else if(IS_DIRECT(mb_type))
1384                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1385                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1386                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1387                     else if(IS_GMC(mb_type))
1388                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1389                     else if(IS_SKIP(mb_type))
1390                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1391                     else if(!USES_LIST(mb_type, 1))
1392                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1393                     else if(!USES_LIST(mb_type, 0))
1394                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1395                     else{
1396                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1397                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1398                     }
1399                     
1400                     //segmentation
1401                     if(IS_8X8(mb_type))
1402                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1403                     else if(IS_16X8(mb_type))
1404                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1405                     else if(IS_8X16(mb_type))
1406                         av_log(s->avctx, AV_LOG_DEBUG, "¦");
1407                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1408                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1409                     else
1410                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1411                     
1412                         
1413                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1414                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1415                     else
1416                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1417                 }
1418 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1419             }
1420             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1421         }
1422     }
1423
1424     if(s->avctx->debug&(FF_DEBUG_VIS_MV|FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)){
1425         const int shift= 1 + s->quarter_sample;
1426         int mb_y;
1427         uint8_t *ptr= pict->data[0];
1428         s->low_delay=0; //needed to see the vectors without trashing the buffers
1429
1430         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1431             int mb_x;
1432             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1433                 const int mb_index= mb_x + mb_y*s->mb_stride;
1434                 if((s->avctx->debug&FF_DEBUG_VIS_MV) && pict->motion_val){
1435                   if(IS_8X8(pict->mb_type[mb_index])){
1436                     int i;
1437                     for(i=0; i<4; i++){
1438                         int sx= mb_x*16 + 4 + 8*(i&1);
1439                         int sy= mb_y*16 + 4 + 8*(i>>1);
1440                         int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
1441                         int mx= (pict->motion_val[0][xy][0]>>shift) + sx;
1442                         int my= (pict->motion_val[0][xy][1]>>shift) + sy;
1443                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1444                     }
1445                   }else if(IS_16X8(pict->mb_type[mb_index])){
1446                     int i;
1447                     for(i=0; i<2; i++){
1448                         int sx=mb_x*16 + 8;
1449                         int sy=mb_y*16 + 4 + 8*i;
1450                         int xy=1 + mb_x*2 + (mb_y*2 + 1 + i)*(s->mb_width*2 + 2);
1451                         int mx=(pict->motion_val[0][xy][0]>>shift) + sx;
1452                         int my=(pict->motion_val[0][xy][1]>>shift) + sy;
1453                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1454                     }
1455                   }else{
1456                     int sx= mb_x*16 + 8;
1457                     int sy= mb_y*16 + 8;
1458                     int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
1459                     int mx= (pict->motion_val[0][xy][0]>>shift) + sx;
1460                     int my= (pict->motion_val[0][xy][1]>>shift) + sy;
1461                     draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1462                   }
1463                 }
1464                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1465                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1466                     int y;
1467                     for(y=0; y<8; y++){
1468                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1469                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1470                     }
1471                 }
1472                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1473                     int mb_type= pict->mb_type[mb_index];
1474                     uint64_t u,v;
1475                     int y;
1476 #define COLOR(theta, r)\
1477 u= (int)(128 + r*cos(theta*3.141592/180));\
1478 v= (int)(128 + r*sin(theta*3.141592/180));
1479
1480                     
1481                     u=v=128;
1482                     if(IS_PCM(mb_type)){
1483                         COLOR(120,48)
1484                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1485                         COLOR(30,48)
1486                     }else if(IS_INTRA4x4(mb_type)){
1487                         COLOR(90,48)
1488                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1489 //                        COLOR(120,48)
1490                     }else if(IS_DIRECT(mb_type)){
1491                         COLOR(150,48)
1492                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1493                         COLOR(170,48)
1494                     }else if(IS_GMC(mb_type)){
1495                         COLOR(190,48)
1496                     }else if(IS_SKIP(mb_type)){
1497 //                        COLOR(180,48)
1498                     }else if(!USES_LIST(mb_type, 1)){
1499                         COLOR(240,48)
1500                     }else if(!USES_LIST(mb_type, 0)){
1501                         COLOR(0,48)
1502                     }else{
1503                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1504                         COLOR(300,48)
1505                     }
1506
1507                     u*= 0x0101010101010101ULL;
1508                     v*= 0x0101010101010101ULL;
1509                     for(y=0; y<8; y++){
1510                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1511                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1512                     }
1513
1514                     //segmentation
1515                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1516                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1517                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1518                     }
1519                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1520                         for(y=0; y<16; y++)
1521                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1522                     }
1523                         
1524                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1525                         // hmm
1526                     }
1527                 }
1528                 s->mbskip_table[mb_index]=0;
1529             }
1530         }
1531     }
1532 }
1533
1534 #ifdef CONFIG_ENCODERS
1535
1536 static int get_sae(uint8_t *src, int ref, int stride){
1537     int x,y;
1538     int acc=0;
1539     
1540     for(y=0; y<16; y++){
1541         for(x=0; x<16; x++){
1542             acc+= ABS(src[x+y*stride] - ref);
1543         }
1544     }
1545     
1546     return acc;
1547 }
1548
1549 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1550     int x, y, w, h;
1551     int acc=0;
1552     
1553     w= s->width &~15;
1554     h= s->height&~15;
1555     
1556     for(y=0; y<h; y+=16){
1557         for(x=0; x<w; x+=16){
1558             int offset= x + y*stride;
1559             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1560             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1561             int sae = get_sae(src + offset, mean, stride);
1562             
1563             acc+= sae + 500 < sad;
1564         }
1565     }
1566     return acc;
1567 }
1568
1569
1570 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1571     AVFrame *pic=NULL;
1572     int i;
1573     const int encoding_delay= s->max_b_frames;
1574     int direct=1;
1575     
1576   if(pic_arg){
1577     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1578     if(pic_arg->linesize[0] != s->linesize) direct=0;
1579     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1580     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1581   
1582 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1583     
1584     if(direct){
1585         i= ff_find_unused_picture(s, 1);
1586
1587         pic= (AVFrame*)&s->picture[i];
1588         pic->reference= 3;
1589     
1590         for(i=0; i<4; i++){
1591             pic->data[i]= pic_arg->data[i];
1592             pic->linesize[i]= pic_arg->linesize[i];
1593         }
1594         alloc_picture(s, (Picture*)pic, 1);
1595     }else{
1596         int offset= 16;
1597         i= ff_find_unused_picture(s, 0);
1598
1599         pic= (AVFrame*)&s->picture[i];
1600         pic->reference= 3;
1601
1602         alloc_picture(s, (Picture*)pic, 0);
1603
1604         if(   pic->data[0] + offset == pic_arg->data[0] 
1605            && pic->data[1] + offset == pic_arg->data[1]
1606            && pic->data[2] + offset == pic_arg->data[2]){
1607        // empty
1608         }else{
1609             int h_chroma_shift, v_chroma_shift;
1610             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1611         
1612             for(i=0; i<3; i++){
1613                 int src_stride= pic_arg->linesize[i];
1614                 int dst_stride= i ? s->uvlinesize : s->linesize;
1615                 int h_shift= i ? h_chroma_shift : 0;
1616                 int v_shift= i ? v_chroma_shift : 0;
1617                 int w= s->width >>h_shift;
1618                 int h= s->height>>v_shift;
1619                 uint8_t *src= pic_arg->data[i];
1620                 uint8_t *dst= pic->data[i] + offset;
1621             
1622                 if(src_stride==dst_stride)
1623                     memcpy(dst, src, src_stride*h);
1624                 else{
1625                     while(h--){
1626                         memcpy(dst, src, w);
1627                         dst += dst_stride;
1628                         src += src_stride;
1629                     }
1630                 }
1631             }
1632         }
1633     }
1634     pic->quality= pic_arg->quality;
1635     pic->pict_type= pic_arg->pict_type;
1636     pic->pts = pic_arg->pts;
1637     pic->interlaced_frame = pic_arg->interlaced_frame;
1638     pic->top_field_first = pic_arg->top_field_first;
1639
1640     if(s->input_picture[encoding_delay])
1641         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1642     
1643   }
1644
1645     /* shift buffer entries */
1646     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1647         s->input_picture[i-1]= s->input_picture[i];
1648         
1649     s->input_picture[encoding_delay]= (Picture*)pic;
1650
1651     return 0;
1652 }
1653
1654 static void select_input_picture(MpegEncContext *s){
1655     int i;
1656     int coded_pic_num=0;    
1657
1658     if(s->reordered_input_picture[0])
1659         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1660
1661     for(i=1; i<MAX_PICTURE_COUNT; i++)
1662         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1663     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1664
1665     /* set next picture types & ordering */
1666     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1667         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1668             s->reordered_input_picture[0]= s->input_picture[0];
1669             s->reordered_input_picture[0]->pict_type= I_TYPE;
1670             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1671         }else{
1672             int b_frames;
1673             
1674             if(s->flags&CODEC_FLAG_PASS2){
1675                 for(i=0; i<s->max_b_frames+1; i++){
1676                     int pict_num= s->input_picture[0]->display_picture_number + i;
1677                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1678                     s->input_picture[i]->pict_type= pict_type;
1679                     
1680                     if(i + 1 >= s->rc_context.num_entries) break;
1681                 }
1682             }
1683
1684             if(s->input_picture[0]->pict_type){
1685                 /* user selected pict_type */
1686                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1687                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1688                 }
1689             
1690                 if(b_frames > s->max_b_frames){
1691                     av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
1692                     b_frames = s->max_b_frames;
1693                 }
1694             }else if(s->avctx->b_frame_strategy==0){
1695                 b_frames= s->max_b_frames;
1696                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1697             }else if(s->avctx->b_frame_strategy==1){
1698                 for(i=1; i<s->max_b_frames+1; i++){
1699                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1700                         s->input_picture[i]->b_frame_score= 
1701                             get_intra_count(s, s->input_picture[i  ]->data[0], 
1702                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1703                     }
1704                 }
1705                 for(i=0; i<s->max_b_frames; i++){
1706                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1707                 }
1708                                 
1709                 b_frames= FFMAX(0, i-1);
1710                 
1711                 /* reset scores */
1712                 for(i=0; i<b_frames+1; i++){
1713                     s->input_picture[i]->b_frame_score=0;
1714                 }
1715             }else{
1716                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1717                 b_frames=0;
1718             }
1719
1720             emms_c();
1721 //static int b_count=0;
1722 //b_count+= b_frames;
1723 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1724                         
1725             s->reordered_input_picture[0]= s->input_picture[b_frames];
1726             if(   s->picture_in_gop_number + b_frames >= s->gop_size 
1727                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1728                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1729             else
1730                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1731             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1732             for(i=0; i<b_frames; i++){
1733                 coded_pic_num++;
1734                 s->reordered_input_picture[i+1]= s->input_picture[i];
1735                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1736                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1737             }
1738         }
1739     }
1740     
1741     if(s->reordered_input_picture[0]){
1742         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
1743
1744         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1745
1746         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1747             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
1748         
1749             int i= ff_find_unused_picture(s, 0);
1750             Picture *pic= &s->picture[i];
1751
1752             /* mark us unused / free shared pic */
1753             for(i=0; i<4; i++)
1754                 s->reordered_input_picture[0]->data[i]= NULL;
1755             s->reordered_input_picture[0]->type= 0;
1756             
1757             //FIXME bad, copy * except
1758             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1759             pic->quality   = s->reordered_input_picture[0]->quality;
1760             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1761             pic->reference = s->reordered_input_picture[0]->reference;
1762             pic->pts = s->reordered_input_picture[0]->pts;
1763             
1764             alloc_picture(s, pic, 0);
1765
1766             s->current_picture_ptr= pic;
1767         }else{
1768             // input is not a shared pix -> reuse buffer for current_pix
1769
1770             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
1771                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1772             
1773             s->current_picture_ptr= s->reordered_input_picture[0];
1774             for(i=0; i<4; i++){
1775                 s->new_picture.data[i]+=16;
1776             }
1777         }
1778         copy_picture(&s->current_picture, s->current_picture_ptr);
1779     
1780         s->picture_number= s->new_picture.display_picture_number;
1781 //printf("dpn:%d\n", s->picture_number);
1782     }else{
1783        memset(&s->new_picture, 0, sizeof(Picture));
1784     }
1785 }
1786
1787 int MPV_encode_picture(AVCodecContext *avctx,
1788                        unsigned char *buf, int buf_size, void *data)
1789 {
1790     MpegEncContext *s = avctx->priv_data;
1791     AVFrame *pic_arg = data;
1792     int i, stuffing_count;
1793
1794     if(avctx->pix_fmt != PIX_FMT_YUV420P){
1795         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
1796         return -1;
1797     }
1798     
1799     init_put_bits(&s->pb, buf, buf_size);
1800
1801     s->picture_in_gop_number++;
1802
1803     load_input_picture(s, pic_arg);
1804     
1805     select_input_picture(s);
1806     
1807     /* output? */
1808     if(s->new_picture.data[0]){
1809
1810         s->pict_type= s->new_picture.pict_type;
1811 //emms_c();
1812 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1813         MPV_frame_start(s, avctx);
1814
1815         encode_picture(s, s->picture_number);
1816         
1817         avctx->real_pict_num  = s->picture_number;
1818         avctx->header_bits = s->header_bits;
1819         avctx->mv_bits     = s->mv_bits;
1820         avctx->misc_bits   = s->misc_bits;
1821         avctx->i_tex_bits  = s->i_tex_bits;
1822         avctx->p_tex_bits  = s->p_tex_bits;
1823         avctx->i_count     = s->i_count;
1824         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1825         avctx->skip_count  = s->skip_count;
1826
1827         MPV_frame_end(s);
1828
1829         if (s->out_format == FMT_MJPEG)
1830             mjpeg_picture_trailer(s);
1831         
1832         if(s->flags&CODEC_FLAG_PASS1)
1833             ff_write_pass1_stats(s);
1834
1835         for(i=0; i<4; i++){
1836             avctx->error[i] += s->current_picture_ptr->error[i];
1837         }
1838     }
1839
1840     s->input_picture_number++;
1841
1842     flush_put_bits(&s->pb);
1843     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1844
1845     stuffing_count= ff_vbv_update(s, s->frame_bits);
1846     if(stuffing_count){
1847         switch(s->codec_id){
1848         case CODEC_ID_MPEG1VIDEO:
1849         case CODEC_ID_MPEG2VIDEO:
1850             while(stuffing_count--){
1851                 put_bits(&s->pb, 8, 0);
1852             }
1853         break;
1854         case CODEC_ID_MPEG4:
1855             put_bits(&s->pb, 16, 0);
1856             put_bits(&s->pb, 16, 0x1C3);
1857             stuffing_count -= 4;
1858             while(stuffing_count--){
1859                 put_bits(&s->pb, 8, 0xFF);
1860             }
1861         break;
1862         default:
1863             av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1864         }
1865         flush_put_bits(&s->pb);
1866         s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1867     }
1868     
1869     s->total_bits += s->frame_bits;
1870     avctx->frame_bits  = s->frame_bits;
1871     
1872     return s->frame_bits/8;
1873 }
1874
1875 #endif //CONFIG_ENCODERS
1876
1877 static inline void gmc1_motion(MpegEncContext *s,
1878                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1879                                int dest_offset,
1880                                uint8_t **ref_picture, int src_offset)
1881 {
1882     uint8_t *ptr;
1883     int offset, src_x, src_y, linesize, uvlinesize;
1884     int motion_x, motion_y;
1885     int emu=0;
1886
1887     motion_x= s->sprite_offset[0][0];
1888     motion_y= s->sprite_offset[0][1];
1889     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1890     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1891     motion_x<<=(3-s->sprite_warping_accuracy);
1892     motion_y<<=(3-s->sprite_warping_accuracy);
1893     src_x = clip(src_x, -16, s->width);
1894     if (src_x == s->width)
1895         motion_x =0;
1896     src_y = clip(src_y, -16, s->height);
1897     if (src_y == s->height)
1898         motion_y =0;
1899
1900     linesize = s->linesize;
1901     uvlinesize = s->uvlinesize;
1902     
1903     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1904
1905     dest_y+=dest_offset;
1906     if(s->flags&CODEC_FLAG_EMU_EDGE){
1907         if(   (unsigned)src_x >= s->h_edge_pos - 17
1908            || (unsigned)src_y >= s->v_edge_pos - 17){
1909             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1910             ptr= s->edge_emu_buffer;
1911         }
1912     }
1913     
1914     if((motion_x|motion_y)&7){
1915         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1916         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1917     }else{
1918         int dxy;
1919         
1920         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1921         if (s->no_rounding){
1922             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1923         }else{
1924             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1925         }
1926     }
1927     
1928     if(s->flags&CODEC_FLAG_GRAY) return;
1929
1930     motion_x= s->sprite_offset[1][0];
1931     motion_y= s->sprite_offset[1][1];
1932     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1933     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1934     motion_x<<=(3-s->sprite_warping_accuracy);
1935     motion_y<<=(3-s->sprite_warping_accuracy);
1936     src_x = clip(src_x, -8, s->width>>1);
1937     if (src_x == s->width>>1)
1938         motion_x =0;
1939     src_y = clip(src_y, -8, s->height>>1);
1940     if (src_y == s->height>>1)
1941         motion_y =0;
1942
1943     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1944     ptr = ref_picture[1] + offset;
1945     if(s->flags&CODEC_FLAG_EMU_EDGE){
1946         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
1947            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
1948             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1949             ptr= s->edge_emu_buffer;
1950             emu=1;
1951         }
1952     }
1953     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1954     
1955     ptr = ref_picture[2] + offset;
1956     if(emu){
1957         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1958         ptr= s->edge_emu_buffer;
1959     }
1960     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1961     
1962     return;
1963 }
1964
1965 static inline void gmc_motion(MpegEncContext *s,
1966                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1967                                int dest_offset,
1968                                uint8_t **ref_picture, int src_offset)
1969 {
1970     uint8_t *ptr;
1971     int linesize, uvlinesize;
1972     const int a= s->sprite_warping_accuracy;
1973     int ox, oy;
1974
1975     linesize = s->linesize;
1976     uvlinesize = s->uvlinesize;
1977
1978     ptr = ref_picture[0] + src_offset;
1979
1980     dest_y+=dest_offset;
1981     
1982     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1983     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1984
1985     s->dsp.gmc(dest_y, ptr, linesize, 16,
1986            ox, 
1987            oy, 
1988            s->sprite_delta[0][0], s->sprite_delta[0][1],
1989            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1990            a+1, (1<<(2*a+1)) - s->no_rounding,
1991            s->h_edge_pos, s->v_edge_pos);
1992     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1993            ox + s->sprite_delta[0][0]*8, 
1994            oy + s->sprite_delta[1][0]*8, 
1995            s->sprite_delta[0][0], s->sprite_delta[0][1],
1996            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1997            a+1, (1<<(2*a+1)) - s->no_rounding,
1998            s->h_edge_pos, s->v_edge_pos);
1999
2000     if(s->flags&CODEC_FLAG_GRAY) return;
2001
2002
2003     dest_cb+=dest_offset>>1;
2004     dest_cr+=dest_offset>>1;
2005     
2006     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2007     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2008
2009     ptr = ref_picture[1] + (src_offset>>1);
2010     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2011            ox, 
2012            oy, 
2013            s->sprite_delta[0][0], s->sprite_delta[0][1],
2014            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2015            a+1, (1<<(2*a+1)) - s->no_rounding,
2016            s->h_edge_pos>>1, s->v_edge_pos>>1);
2017     
2018     ptr = ref_picture[2] + (src_offset>>1);
2019     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2020            ox, 
2021            oy, 
2022            s->sprite_delta[0][0], s->sprite_delta[0][1],
2023            s->sprite_delta[1][0], s->sprite_delta[1][1], 
2024            a+1, (1<<(2*a+1)) - s->no_rounding,
2025            s->h_edge_pos>>1, s->v_edge_pos>>1);
2026 }
2027
2028 /**
2029  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2030  * @param buf destination buffer
2031  * @param src source buffer
2032  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2033  * @param block_w width of block
2034  * @param block_h height of block
2035  * @param src_x x coordinate of the top left sample of the block in the source buffer
2036  * @param src_y y coordinate of the top left sample of the block in the source buffer
2037  * @param w width of the source buffer
2038  * @param h height of the source buffer
2039  */
2040 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
2041                                     int src_x, int src_y, int w, int h){
2042     int x, y;
2043     int start_y, start_x, end_y, end_x;
2044
2045     if(src_y>= h){
2046         src+= (h-1-src_y)*linesize;
2047         src_y=h-1;
2048     }else if(src_y<=-block_h){
2049         src+= (1-block_h-src_y)*linesize;
2050         src_y=1-block_h;
2051     }
2052     if(src_x>= w){
2053         src+= (w-1-src_x);
2054         src_x=w-1;
2055     }else if(src_x<=-block_w){
2056         src+= (1-block_w-src_x);
2057         src_x=1-block_w;
2058     }
2059
2060     start_y= FFMAX(0, -src_y);
2061     start_x= FFMAX(0, -src_x);
2062     end_y= FFMIN(block_h, h-src_y);
2063     end_x= FFMIN(block_w, w-src_x);
2064
2065     // copy existing part
2066     for(y=start_y; y<end_y; y++){
2067         for(x=start_x; x<end_x; x++){
2068             buf[x + y*linesize]= src[x + y*linesize];
2069         }
2070     }
2071
2072     //top
2073     for(y=0; y<start_y; y++){
2074         for(x=start_x; x<end_x; x++){
2075             buf[x + y*linesize]= buf[x + start_y*linesize];
2076         }
2077     }
2078
2079     //bottom
2080     for(y=end_y; y<block_h; y++){
2081         for(x=start_x; x<end_x; x++){
2082             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2083         }
2084     }
2085                                     
2086     for(y=0; y<block_h; y++){
2087        //left
2088         for(x=0; x<start_x; x++){
2089             buf[x + y*linesize]= buf[start_x + y*linesize];
2090         }
2091        
2092        //right
2093         for(x=end_x; x<block_w; x++){
2094             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2095         }
2096     }
2097 }
2098
2099 static inline int hpel_motion(MpegEncContext *s, 
2100                                   uint8_t *dest, uint8_t *src, 
2101                                   int src_x, int src_y,
2102                                   int width, int height, int stride,
2103                                   int h_edge_pos, int v_edge_pos,
2104                                   int w, int h, op_pixels_func *pix_op,
2105                                   int motion_x, int motion_y)
2106 {
2107     int dxy;
2108     int emu=0;
2109
2110     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2111     src_x += motion_x >> 1;
2112     src_y += motion_y >> 1;
2113                 
2114     /* WARNING: do no forget half pels */
2115     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2116     if (src_x == width)
2117         dxy &= ~1;
2118     src_y = clip(src_y, -16, height);
2119     if (src_y == height)
2120         dxy &= ~2;
2121     src += src_y * stride + src_x;
2122
2123     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2124         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2125            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2126             ff_emulated_edge_mc(s->edge_emu_buffer, src, stride, w+1, h+1,
2127                              src_x, src_y, h_edge_pos, v_edge_pos);
2128             src= s->edge_emu_buffer;
2129             emu=1;
2130         }
2131     }
2132     pix_op[dxy](dest, src, stride, h);
2133     return emu;
2134 }
2135
2136 /* apply one mpeg motion vector to the three components */
2137 static inline void mpeg_motion(MpegEncContext *s,
2138                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2139                                int dest_offset,
2140                                uint8_t **ref_picture, int src_offset,
2141                                int field_based, op_pixels_func (*pix_op)[4],
2142                                int motion_x, int motion_y, int h)
2143 {
2144     uint8_t *ptr;
2145     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, uvlinesize;
2146     int emu=0;
2147 #if 0    
2148 if(s->quarter_sample)
2149 {
2150     motion_x>>=1;
2151     motion_y>>=1;
2152 }
2153 #endif
2154
2155     height = s->height >> field_based;
2156     v_edge_pos = s->v_edge_pos >> field_based;
2157     uvlinesize = s->current_picture.linesize[1] << field_based;
2158
2159     emu= hpel_motion(s, 
2160                 dest_y + dest_offset, ref_picture[0] + src_offset,
2161                 s->mb_x * 16, s->mb_y * (16 >> field_based),
2162                 s->width, height, s->current_picture.linesize[0] << field_based,
2163                 s->h_edge_pos, v_edge_pos,
2164                 16, h, pix_op[0],
2165                 motion_x, motion_y);
2166
2167
2168     if(s->flags&CODEC_FLAG_GRAY) return;
2169
2170     if (s->out_format == FMT_H263) {
2171         dxy = 0;
2172         if ((motion_x & 3) != 0)
2173             dxy |= 1;
2174         if ((motion_y & 3) != 0)
2175             dxy |= 2;
2176         mx = motion_x >> 2;
2177         my = motion_y >> 2;
2178     } else {
2179         mx = motion_x / 2;
2180         my = motion_y / 2;
2181         dxy = ((my & 1) << 1) | (mx & 1);
2182         mx >>= 1;
2183         my >>= 1;
2184     }
2185     
2186     src_x = s->mb_x * 8 + mx;
2187     src_y = s->mb_y * (8 >> field_based) + my;
2188     src_x = clip(src_x, -8, s->width >> 1);
2189     if (src_x == (s->width >> 1))
2190         dxy &= ~1;
2191     src_y = clip(src_y, -8, height >> 1);
2192     if (src_y == (height >> 1))
2193         dxy &= ~2;
2194     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
2195     ptr = ref_picture[1] + offset;
2196     if(emu){
2197         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
2198                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2199         ptr= s->edge_emu_buffer + (src_offset >> 1);
2200     }
2201     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
2202
2203     ptr = ref_picture[2] + offset;
2204     if(emu){
2205         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
2206                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2207         ptr= s->edge_emu_buffer + (src_offset >> 1);
2208     }
2209     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
2210 }
2211 //FIXME move to dsputil, avg variant, 16x16 version
2212 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
2213     int x;
2214     uint8_t * const top   = src[1];
2215     uint8_t * const left  = src[2];
2216     uint8_t * const mid   = src[0];
2217     uint8_t * const right = src[3];
2218     uint8_t * const bottom= src[4];
2219 #define OBMC_FILTER(x, t, l, m, r, b)\
2220     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
2221 #define OBMC_FILTER4(x, t, l, m, r, b)\
2222     OBMC_FILTER(x         , t, l, m, r, b);\
2223     OBMC_FILTER(x+1       , t, l, m, r, b);\
2224     OBMC_FILTER(x  +stride, t, l, m, r, b);\
2225     OBMC_FILTER(x+1+stride, t, l, m, r, b);
2226     
2227     x=0;
2228     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
2229     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
2230     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
2231     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
2232     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
2233     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
2234     x+= stride;
2235     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
2236     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
2237     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
2238     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
2239     x+= stride;
2240     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
2241     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
2242     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
2243     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
2244     x+= 2*stride;
2245     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
2246     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
2247     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
2248     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
2249     x+= 2*stride;
2250     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
2251     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
2252     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
2253     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
2254     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
2255     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
2256     x+= stride;
2257     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
2258     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
2259     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
2260     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
2261 }
2262
2263 /* obmc for 1 8x8 luma block */
2264 static inline void obmc_motion(MpegEncContext *s,
2265                                uint8_t *dest, uint8_t *src,
2266                                int src_x, int src_y,
2267                                op_pixels_func *pix_op,
2268                                int16_t mv[5][2]/* mid top left right bottom*/)
2269 #define MID    0
2270 {
2271     int i;
2272     uint8_t *ptr[5];
2273     
2274     assert(s->quarter_sample==0);
2275     
2276     for(i=0; i<5; i++){
2277         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
2278             ptr[i]= ptr[MID];
2279         }else{
2280             ptr[i]= s->edge_emu_buffer + 16 + 8*(i&1) + s->linesize*8*(i>>1);
2281             hpel_motion(s, ptr[i], src,
2282                         src_x, src_y,
2283                         s->width, s->height, s->linesize,
2284                         s->h_edge_pos, s->v_edge_pos,
2285                         8, 8, pix_op,
2286                         mv[i][0], mv[i][1]);
2287         }
2288     }
2289
2290     put_obmc(dest, ptr, s->linesize);                
2291 }
2292
2293 static inline void qpel_motion(MpegEncContext *s,
2294                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2295                                int dest_offset,
2296                                uint8_t **ref_picture, int src_offset,
2297                                int field_based, op_pixels_func (*pix_op)[4],
2298                                qpel_mc_func (*qpix_op)[16],
2299                                int motion_x, int motion_y, int h)
2300 {
2301     uint8_t *ptr;
2302     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
2303     int emu=0;
2304
2305     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2306     src_x = s->mb_x * 16 + (motion_x >> 2);
2307     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
2308
2309     height = s->height >> field_based;
2310     v_edge_pos = s->v_edge_pos >> field_based;
2311     src_x = clip(src_x, -16, s->width);
2312     if (src_x == s->width)
2313         dxy &= ~3;
2314     src_y = clip(src_y, -16, height);
2315     if (src_y == height)
2316         dxy &= ~12;
2317     linesize = s->linesize << field_based;
2318     uvlinesize = s->uvlinesize << field_based;
2319     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
2320     dest_y += dest_offset;
2321 //printf("%d %d %d\n", src_x, src_y, dxy);
2322     
2323     if(s->flags&CODEC_FLAG_EMU_EDGE){
2324         if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
2325            || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
2326             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based, 
2327                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2328             ptr= s->edge_emu_buffer + src_offset;
2329             emu=1;
2330         }
2331     }
2332     if(!field_based)
2333         qpix_op[0][dxy](dest_y, ptr, linesize);
2334     else{
2335         //damn interlaced mode
2336         //FIXME boundary mirroring is not exactly correct here
2337         qpix_op[1][dxy](dest_y  , ptr  , linesize);
2338         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
2339     }
2340
2341     if(s->flags&CODEC_FLAG_GRAY) return;
2342
2343     if(field_based){
2344         mx= motion_x/2;
2345         my= motion_y>>1;
2346     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2347         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2348         mx= (motion_x>>1) + rtab[motion_x&7];
2349         my= (motion_y>>1) + rtab[motion_y&7];
2350     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2351         mx= (motion_x>>1)|(motion_x&1);
2352         my= (motion_y>>1)|(motion_y&1);
2353     }else{
2354         mx= motion_x/2;
2355         my= motion_y/2;
2356     }
2357     mx= (mx>>1)|(mx&1);
2358     my= (my>>1)|(my&1);
2359
2360     dxy= (mx&1) | ((my&1)<<1);
2361     mx>>=1;
2362     my>>=1;
2363
2364     src_x = s->mb_x * 8 + mx;
2365     src_y = s->mb_y * (8 >> field_based) + my;
2366     src_x = clip(src_x, -8, s->width >> 1);
2367     if (src_x == (s->width >> 1))
2368         dxy &= ~1;
2369     src_y = clip(src_y, -8, height >> 1);
2370     if (src_y == (height >> 1))
2371         dxy &= ~2;
2372
2373     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
2374     ptr = ref_picture[1] + offset;
2375     if(emu){
2376         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
2377                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2378         ptr= s->edge_emu_buffer + (src_offset >> 1);
2379     }
2380     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2381     
2382     ptr = ref_picture[2] + offset;
2383     if(emu){
2384         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
2385                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2386         ptr= s->edge_emu_buffer + (src_offset >> 1);
2387     }
2388     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2389 }
2390
2391 inline int ff_h263_round_chroma(int x){
2392     if (x >= 0)
2393         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2394     else {
2395         x = -x;
2396         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2397     }
2398 }
2399
2400 /**
2401  * h263 chorma 4mv motion compensation.
2402  */
2403 static inline void chroma_4mv_motion(MpegEncContext *s,
2404                                      uint8_t *dest_cb, uint8_t *dest_cr,
2405                                      uint8_t **ref_picture,
2406                                      op_pixels_func *pix_op,
2407                                      int mx, int my){
2408     int dxy, emu=0, src_x, src_y, offset;
2409     uint8_t *ptr;
2410     
2411     /* In case of 8X8, we construct a single chroma motion vector
2412        with a special rounding */
2413     mx= ff_h263_round_chroma(mx);
2414     my= ff_h263_round_chroma(my);
2415     
2416     dxy = ((my & 1) << 1) | (mx & 1);
2417     mx >>= 1;
2418     my >>= 1;
2419
2420     src_x = s->mb_x * 8 + mx;
2421     src_y = s->mb_y * 8 + my;
2422     src_x = clip(src_x, -8, s->width/2);
2423     if (src_x == s->width/2)
2424         dxy &= ~1;
2425     src_y = clip(src_y, -8, s->height/2);
2426     if (src_y == s->height/2)
2427         dxy &= ~2;
2428     
2429     offset = (src_y * (s->uvlinesize)) + src_x;
2430     ptr = ref_picture[1] + offset;
2431     if(s->flags&CODEC_FLAG_EMU_EDGE){
2432         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
2433            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
2434             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2435             ptr= s->edge_emu_buffer;
2436             emu=1;
2437         }
2438     }
2439     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
2440
2441     ptr = ref_picture[2] + offset;
2442     if(emu){
2443         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2444         ptr= s->edge_emu_buffer;
2445     }
2446     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
2447 }
2448
2449 /**
2450  * motion compesation of a single macroblock
2451  * @param s context
2452  * @param dest_y luma destination pointer
2453  * @param dest_cb chroma cb/u destination pointer
2454  * @param dest_cr chroma cr/v destination pointer
2455  * @param dir direction (0->forward, 1->backward)
2456  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2457  * @param pic_op halfpel motion compensation function (average or put normally)
2458  * @param pic_op qpel motion compensation function (average or put normally)
2459  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2460  */
2461 static inline void MPV_motion(MpegEncContext *s, 
2462                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2463                               int dir, uint8_t **ref_picture, 
2464                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2465 {
2466     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
2467     int mb_x, mb_y, i;
2468     uint8_t *ptr, *dest;
2469
2470     mb_x = s->mb_x;
2471     mb_y = s->mb_y;
2472
2473     if(s->obmc && s->pict_type != B_TYPE){
2474         int16_t mv_cache[4][4][2];
2475         const int xy= s->mb_x + s->mb_y*s->mb_stride;
2476         const int mot_stride= s->mb_width*2 + 2;
2477         const int mot_xy= 1 + mb_x*2 + (mb_y*2 + 1)*mot_stride;
2478
2479         assert(!s->mb_skiped);
2480                 
2481         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
2482         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2483         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
2484
2485         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
2486             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
2487         }else{
2488             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
2489         }
2490
2491         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
2492             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
2493             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
2494         }else{
2495             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
2496             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
2497         }
2498
2499         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
2500             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
2501             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
2502         }else{
2503             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
2504             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
2505         }
2506         
2507         mx = 0;
2508         my = 0;
2509         for(i=0;i<4;i++) {
2510             const int x= (i&1)+1;
2511             const int y= (i>>1)+1;
2512             int16_t mv[5][2]= {
2513                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
2514                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
2515                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
2516                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
2517                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
2518             //FIXME cleanup
2519             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2520                         ref_picture[0],
2521                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2522                         pix_op[1],
2523                         mv);
2524
2525             mx += mv[0][0];
2526             my += mv[0][1];
2527         }
2528         if(!(s->flags&CODEC_FLAG_GRAY))
2529             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2530
2531         return;
2532     }
2533    
2534     switch(s->mv_type) {
2535     case MV_TYPE_16X16:
2536 #ifdef CONFIG_RISKY
2537         if(s->mcsel){
2538             if(s->real_sprite_warping_points==1){
2539                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
2540                             ref_picture, 0);
2541             }else{
2542                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
2543                             ref_picture, 0);
2544             }
2545         }else if(s->quarter_sample){
2546             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2547                         ref_picture, 0,
2548                         0, pix_op, qpix_op,
2549                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2550         }else if(s->mspel){
2551             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2552                         ref_picture, pix_op,
2553                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2554         }else
2555 #endif
2556         {
2557             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2558                         ref_picture, 0,
2559                         0, pix_op,
2560                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2561         }           
2562         break;
2563     case MV_TYPE_8X8:
2564         mx = 0;
2565         my = 0;
2566         if(s->quarter_sample){
2567             for(i=0;i<4;i++) {
2568                 motion_x = s->mv[dir][i][0];
2569                 motion_y = s->mv[dir][i][1];
2570
2571                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2572                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2573                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2574                     
2575                 /* WARNING: do no forget half pels */
2576                 src_x = clip(src_x, -16, s->width);
2577                 if (src_x == s->width)
2578                     dxy &= ~3;
2579                 src_y = clip(src_y, -16, s->height);
2580                 if (src_y == s->height)
2581                     dxy &= ~12;
2582                     
2583                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2584                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2585                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
2586                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
2587                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2588                         ptr= s->edge_emu_buffer;
2589                     }
2590                 }
2591                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2592                 qpix_op[1][dxy](dest, ptr, s->linesize);
2593
2594                 mx += s->mv[dir][i][0]/2;
2595                 my += s->mv[dir][i][1]/2;
2596             }
2597         }else{
2598             for(i=0;i<4;i++) {
2599                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
2600                             ref_picture[0],
2601                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
2602                             s->width, s->height, s->linesize,
2603                             s->h_edge_pos, s->v_edge_pos,
2604                             8, 8, pix_op[1],
2605                             s->mv[dir][i][0], s->mv[dir][i][1]);
2606
2607                 mx += s->mv[dir][i][0];
2608                 my += s->mv[dir][i][1];
2609             }
2610         }
2611
2612         if(!(s->flags&CODEC_FLAG_GRAY))
2613             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
2614         break;
2615     case MV_TYPE_FIELD:
2616         if (s->picture_structure == PICT_FRAME) {
2617             if(s->quarter_sample){
2618                 /* top field */
2619                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2620                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2621                             1, pix_op, qpix_op,
2622                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2623                 /* bottom field */
2624                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2625                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2626                             1, pix_op, qpix_op,
2627                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2628             }else{
2629                 /* top field */       
2630                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2631                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2632                             1, pix_op,
2633                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2634                 /* bottom field */
2635                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2636                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2637                             1, pix_op,
2638                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2639             }
2640         } else {
2641             int offset;
2642             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2643                 offset= s->field_select[dir][0] ? s->linesize : 0;
2644             }else{
2645                 ref_picture= s->current_picture.data;
2646                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
2647             } 
2648
2649             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2650                         ref_picture, offset,
2651                         0, pix_op,
2652                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2653         }
2654         break;
2655     case MV_TYPE_16X8:{
2656         int offset;
2657          uint8_t ** ref2picture;
2658
2659             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2660                 ref2picture= ref_picture;
2661                 offset= s->field_select[dir][0] ? s->linesize : 0;
2662             }else{
2663                 ref2picture= s->current_picture.data;
2664                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
2665             } 
2666
2667             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2668                         ref2picture, offset,
2669                         0, pix_op,
2670                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2671
2672
2673             if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
2674                 ref2picture= ref_picture;
2675                 offset= s->field_select[dir][1] ? s->linesize : 0;
2676             }else{
2677                 ref2picture= s->current_picture.data;
2678                 offset= s->field_select[dir][1] ? s->linesize : -s->linesize; 
2679             } 
2680             // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
2681             mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
2682                         0,
2683                         ref2picture, offset,
2684                         0, pix_op,
2685                         s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
2686         }
2687         
2688         break;
2689     case MV_TYPE_DMV:
2690     {
2691     op_pixels_func (*dmv_pix_op)[4];
2692     int offset;
2693
2694         dmv_pix_op = s->dsp.put_pixels_tab;
2695
2696         if(s->picture_structure == PICT_FRAME){
2697             //put top field from top field
2698             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2699                         ref_picture, 0,
2700                         1, dmv_pix_op,
2701                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2702             //put bottom field from bottom field
2703             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2704                         ref_picture, s->linesize,
2705                         1, dmv_pix_op,
2706                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2707
2708             dmv_pix_op = s->dsp.avg_pixels_tab; 
2709         
2710             //avg top field from bottom field
2711             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2712                         ref_picture, s->linesize,
2713                         1, dmv_pix_op,
2714                         s->mv[dir][2][0], s->mv[dir][2][1], 8);
2715             //avg bottom field from top field
2716             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2717                         ref_picture, 0,
2718                         1, dmv_pix_op,
2719                         s->mv[dir][3][0], s->mv[dir][3][1], 8);
2720
2721         }else{
2722             offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2723                          s->linesize : 0;
2724
2725             //put field from the same parity
2726             //same parity is never in the same frame
2727             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2728                         ref_picture,offset,
2729                         0,dmv_pix_op,
2730                         s->mv[dir][0][0],s->mv[dir][0][1],16);
2731
2732             // after put we make avg of the same block
2733             dmv_pix_op=s->dsp.avg_pixels_tab; 
2734
2735             //opposite parity is always in the same frame if this is second field
2736             if(!s->first_field){
2737                 ref_picture = s->current_picture.data;    
2738                 //top field is one linesize from frame beginig
2739                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2740                         -s->linesize : s->linesize;
2741             }else 
2742                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2743                         0 : s->linesize;
2744
2745             //avg field from the opposite parity
2746             mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
2747                         ref_picture, offset,
2748                         0,dmv_pix_op,
2749                         s->mv[dir][2][0],s->mv[dir][2][1],16);
2750         }
2751     }
2752     break;
2753     default: assert(0);
2754     }
2755 }
2756
2757
2758 /* put block[] to dest[] */
2759 static inline void put_dct(MpegEncContext *s, 
2760                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
2761 {
2762     s->dct_unquantize_intra(s, block, i, qscale);
2763     s->dsp.idct_put (dest, line_size, block);
2764 }
2765
2766 /* add block[] to dest[] */
2767 static inline void add_dct(MpegEncContext *s, 
2768                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2769 {
2770     if (s->block_last_index[i] >= 0) {
2771         s->dsp.idct_add (dest, line_size, block);
2772     }
2773 }
2774
2775 static inline void add_dequant_dct(MpegEncContext *s, 
2776                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
2777 {
2778     if (s->block_last_index[i] >= 0) {
2779         s->dct_unquantize_inter(s, block, i, qscale);
2780
2781         s->dsp.idct_add (dest, line_size, block);
2782     }
2783 }
2784
2785 /**
2786  * cleans dc, ac, coded_block for the current non intra MB
2787  */
2788 void ff_clean_intra_table_entries(MpegEncContext *s)
2789 {
2790     int wrap = s->block_wrap[0];
2791     int xy = s->block_index[0];
2792     
2793     s->dc_val[0][xy           ] = 
2794     s->dc_val[0][xy + 1       ] = 
2795     s->dc_val[0][xy     + wrap] =
2796     s->dc_val[0][xy + 1 + wrap] = 1024;
2797     /* ac pred */
2798     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
2799     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
2800     if (s->msmpeg4_version>=3) {
2801         s->coded_block[xy           ] =
2802         s->coded_block[xy + 1       ] =
2803         s->coded_block[xy     + wrap] =
2804         s->coded_block[xy + 1 + wrap] = 0;
2805     }
2806     /* chroma */
2807     wrap = s->block_wrap[4];
2808     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
2809     s->dc_val[1][xy] =
2810     s->dc_val[2][xy] = 1024;
2811     /* ac pred */
2812     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
2813     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
2814     
2815     s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
2816 }
2817
2818 /* generic function called after a macroblock has been parsed by the
2819    decoder or after it has been encoded by the encoder.
2820
2821    Important variables used:
2822    s->mb_intra : true if intra macroblock
2823    s->mv_dir   : motion vector direction
2824    s->mv_type  : motion vector type
2825    s->mv       : motion vector
2826    s->interlaced_dct : true if interlaced dct used (mpeg2)
2827  */
2828 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2829 {
2830     int mb_x, mb_y;
2831     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
2832 #ifdef HAVE_XVMC
2833     if(s->avctx->xvmc_acceleration){
2834         XVMC_decode_mb(s);//xvmc uses pblocks
2835         return;
2836     }
2837 #endif
2838
2839     mb_x = s->mb_x;
2840     mb_y = s->mb_y;
2841
2842     s->current_picture.qscale_table[mb_xy]= s->qscale;
2843
2844     /* update DC predictors for P macroblocks */
2845     if (!s->mb_intra) {
2846         if (s->h263_pred || s->h263_aic) {
2847             if(s->mbintra_table[mb_xy])
2848                 ff_clean_intra_table_entries(s);
2849         } else {
2850             s->last_dc[0] =
2851             s->last_dc[1] =
2852             s->last_dc[2] = 128 << s->intra_dc_precision;
2853         }
2854     }
2855     else if (s->h263_pred || s->h263_aic)
2856         s->mbintra_table[mb_xy]=1;
2857
2858     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
2859         uint8_t *dest_y, *dest_cb, *dest_cr;
2860         int dct_linesize, dct_offset;
2861         op_pixels_func (*op_pix)[4];
2862         qpel_mc_func (*op_qpix)[16];
2863         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2864         const int uvlinesize= s->current_picture.linesize[1];
2865         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band;
2866
2867         /* avoid copy if macroblock skipped in last frame too */
2868         /* skip only during decoding as we might trash the buffers during encoding a bit */
2869         if(!s->encoding){
2870             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
2871             const int age= s->current_picture.age;
2872
2873             assert(age);
2874
2875             if (s->mb_skiped) {
2876                 s->mb_skiped= 0;
2877                 assert(s->pict_type!=I_TYPE);
2878  
2879                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2880                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2881
2882                 /* if previous was skipped too, then nothing to do !  */
2883                 if (*mbskip_ptr >= age && s->current_picture.reference){
2884                     return;
2885                 }
2886             } else if(!s->current_picture.reference){
2887                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
2888                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2889             } else{
2890                 *mbskip_ptr = 0; /* not skipped */
2891             }
2892         }
2893
2894         if (s->interlaced_dct) {
2895             dct_linesize = linesize * 2;
2896             dct_offset = linesize;
2897         } else {
2898             dct_linesize = linesize;
2899             dct_offset = linesize * 8;
2900         }
2901         if(readable){
2902             dest_y=  s->dest[0];
2903             dest_cb= s->dest[1];
2904             dest_cr= s->dest[2];
2905         }else{
2906             dest_y = s->edge_emu_buffer+32; //FIXME cleanup scratchpad pointers
2907             dest_cb= s->edge_emu_buffer+48;
2908             dest_cr= s->edge_emu_buffer+56;
2909         }
2910         if (!s->mb_intra) {
2911             /* motion handling */
2912             /* decoding or more than one mb_type (MC was allready done otherwise) */
2913             if(!s->encoding){
2914                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
2915                     op_pix = s->dsp.put_pixels_tab;
2916                     op_qpix= s->dsp.put_qpel_pixels_tab;
2917                 }else{
2918                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2919                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2920                 }
2921
2922                 if (s->mv_dir & MV_DIR_FORWARD) {
2923                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2924                     op_pix = s->dsp.avg_pixels_tab;
2925                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2926                 }
2927                 if (s->mv_dir & MV_DIR_BACKWARD) {
2928                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2929                 }
2930             }
2931
2932             /* skip dequant / idct if we are really late ;) */
2933             if(s->hurry_up>1) return;
2934
2935             /* add dct residue */
2936             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
2937                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2938                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
2939                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
2940                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
2941                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
2942
2943                 if(!(s->flags&CODEC_FLAG_GRAY)){
2944                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2945                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2946                 }
2947             } else if(s->codec_id != CODEC_ID_WMV2){
2948                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2949                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2950                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2951                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2952
2953                 if(!(s->flags&CODEC_FLAG_GRAY)){
2954                     add_dct(s, block[4], 4, dest_cb, uvlinesize);
2955                     add_dct(s, block[5], 5, dest_cr, uvlinesize);
2956                 }
2957             } 
2958 #ifdef CONFIG_RISKY
2959             else{
2960                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2961             }
2962 #endif
2963         } else {
2964             /* dct only in intra block */
2965             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2966                 put_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale);
2967                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale);
2968                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale);
2969                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale);
2970
2971                 if(!(s->flags&CODEC_FLAG_GRAY)){
2972                     put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2973                     put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2974                 }
2975             }else{
2976                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
2977                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
2978                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2979                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2980
2981                 if(!(s->flags&CODEC_FLAG_GRAY)){
2982                     s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2983                     s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2984                 }
2985             }
2986         }
2987         if(!readable){
2988             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2989             s->dsp.put_pixels_tab[1][0](s->dest[1], dest_cb, uvlinesize, 8);
2990             s->dsp.put_pixels_tab[1][0](s->dest[2], dest_cr, uvlinesize, 8);
2991         }
2992     }
2993 }
2994
2995 #ifdef CONFIG_ENCODERS
2996
2997 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2998 {
2999     static const char tab[64]=
3000         {3,2,2,1,1,1,1,1,
3001          1,1,1,1,1,1,1,1,
3002          1,1,1,1,1,1,1,1,
3003          0,0,0,0,0,0,0,0,
3004          0,0,0,0,0,0,0,0,
3005          0,0,0,0,0,0,0,0,
3006          0,0,0,0,0,0,0,0,
3007          0,0,0,0,0,0,0,0};
3008     int score=0;
3009     int run=0;
3010     int i;
3011     DCTELEM *block= s->block[n];
3012     const int last_index= s->block_last_index[n];
3013     int skip_dc;
3014
3015     if(threshold<0){
3016         skip_dc=0;
3017         threshold= -threshold;
3018     }else
3019         skip_dc=1;
3020
3021     /* are all which we could set to zero are allready zero? */
3022     if(last_index<=skip_dc - 1) return;
3023
3024     for(i=0; i<=last_index; i++){
3025         const int j = s->intra_scantable.permutated[i];
3026         const int level = ABS(block[j]);
3027         if(level==1){
3028             if(skip_dc && i==0) continue;
3029             score+= tab[run];
3030             run=0;
3031         }else if(level>1){
3032             return;
3033         }else{
3034             run++;
3035         }
3036     }
3037     if(score >= threshold) return;
3038     for(i=skip_dc; i<=last_index; i++){
3039         const int j = s->intra_scantable.permutated[i];
3040         block[j]=0;
3041     }
3042     if(block[0]) s->block_last_index[n]= 0;
3043     else         s->block_last_index[n]= -1;
3044 }
3045
3046 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
3047 {
3048     int i;
3049     const int maxlevel= s->max_qcoeff;
3050     const int minlevel= s->min_qcoeff;
3051     
3052     if(s->mb_intra){
3053         i=1; //skip clipping of intra dc
3054     }else
3055         i=0;
3056     
3057     for(;i<=last_index; i++){
3058         const int j= s->intra_scantable.permutated[i];
3059         int level = block[j];
3060        
3061         if     (level>maxlevel) level=maxlevel;
3062         else if(level<minlevel) level=minlevel;
3063
3064         block[j]= level;
3065     }
3066 }
3067
3068 #if 0
3069 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
3070     int score=0;
3071     int x,y;
3072     
3073     for(y=0; y<7; y++){
3074         for(x=0; x<16; x+=4){
3075             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
3076                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
3077         }
3078         s+= stride;
3079     }
3080     
3081     return score;
3082 }
3083
3084 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
3085     int score=0;
3086     int x,y;
3087     
3088     for(y=0; y<7; y++){
3089         for(x=0; x<16; x++){
3090             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
3091         }
3092         s1+= stride;
3093         s2+= stride;
3094     }
3095     
3096     return score;
3097 }
3098 #else
3099 #define SQ(a) ((a)*(a))
3100
3101 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
3102     int score=0;
3103     int x,y;
3104     
3105     for(y=0; y<7; y++){
3106         for(x=0; x<16; x+=4){
3107             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
3108                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3109         }
3110         s+= stride;
3111     }
3112     
3113     return score;
3114 }
3115
3116 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
3117     int score=0;
3118     int x,y;
3119     
3120     for(y=0; y<7; y++){
3121         for(x=0; x<16; x++){
3122             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
3123         }
3124         s1+= stride;
3125         s2+= stride;
3126     }
3127     
3128     return score;
3129 }
3130
3131 #endif
3132
3133 #endif //CONFIG_ENCODERS
3134
3135 /**
3136  *
3137  * @param h is the normal height, this will be reduced automatically if needed for the last row
3138  */
3139 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
3140     if (s->avctx->draw_horiz_band) {
3141         AVFrame *src;
3142         int offset[4];
3143         
3144         if(s->picture_structure != PICT_FRAME){
3145             h <<= 1;
3146             y <<= 1;
3147             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
3148         }
3149
3150         h= FFMIN(h, s->height - y);
3151
3152         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
3153             src= (AVFrame*)s->current_picture_ptr;
3154         else if(s->last_picture_ptr)
3155             src= (AVFrame*)s->last_picture_ptr;
3156         else
3157             return;
3158             
3159         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
3160             offset[0]=
3161             offset[1]=
3162             offset[2]=
3163             offset[3]= 0;
3164         }else{
3165             offset[0]= y * s->linesize;;
3166             offset[1]= 
3167             offset[2]= (y>>1) * s->uvlinesize;;
3168             offset[3]= 0;
3169         }
3170
3171         emms_c();
3172
3173         s->avctx->draw_horiz_band(s->avctx, src, offset,
3174                                   y, s->picture_structure, h);
3175     }
3176 }
3177
3178 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
3179     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
3180     const int uvlinesize= s->current_picture.linesize[1];
3181         
3182     s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
3183     s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
3184     s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
3185     s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
3186     s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
3187     s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
3188     
3189     if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){
3190         s->dest[0] = s->current_picture.data[0] + s->mb_x * 16 - 16;
3191         s->dest[1] = s->current_picture.data[1] + s->mb_x * 8 - 8;
3192         s->dest[2] = s->current_picture.data[2] + s->mb_x * 8 - 8;
3193     }else{
3194         s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* linesize  ) + s->mb_x * 16 - 16;
3195         s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
3196         s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
3197     }    
3198 }
3199
3200 #ifdef CONFIG_ENCODERS
3201
3202 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
3203 {
3204     const int mb_x= s->mb_x;
3205     const int mb_y= s->mb_y;
3206     int i;
3207     int skip_dct[6];
3208     int dct_offset   = s->linesize*8; //default for progressive frames
3209     
3210     for(i=0; i<6; i++) skip_dct[i]=0;
3211     
3212     if(s->adaptive_quant){
3213         const int last_qp= s->qscale;
3214         const int mb_xy= mb_x + mb_y*s->mb_stride;
3215
3216         s->lambda= s->lambda_table[mb_xy];
3217         update_qscale(s);
3218     
3219         if(!(s->flags&CODEC_FLAG_QP_RD)){
3220             s->dquant= s->qscale - last_qp;
3221
3222             if(s->out_format==FMT_H263)
3223                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
3224             
3225             if(s->codec_id==CODEC_ID_MPEG4){        
3226                 if(!s->mb_intra){
3227                     if((s->mv_dir&MV_DIRECT) || s->mv_type==MV_TYPE_8X8)
3228                         s->dquant=0;
3229                 }
3230             }
3231         }
3232         ff_set_qscale(s, last_qp + s->dquant);
3233     }
3234
3235     if (s->mb_intra) {
3236         uint8_t *ptr;
3237         int wrap_y;
3238         int emu=0;
3239
3240         wrap_y = s->linesize;
3241         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
3242
3243         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
3244             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
3245             ptr= s->edge_emu_buffer;
3246             emu=1;
3247         }
3248         
3249         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3250             int progressive_score, interlaced_score;
3251             
3252             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
3253             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
3254             
3255             if(progressive_score > interlaced_score + 100){
3256                 s->interlaced_dct=1;
3257             
3258                 dct_offset= wrap_y;
3259                 wrap_y<<=1;
3260             }else
3261                 s->interlaced_dct=0;
3262         }
3263         
3264         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
3265         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
3266         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
3267         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
3268
3269         if(s->flags&CODEC_FLAG_GRAY){
3270             skip_dct[4]= 1;
3271             skip_dct[5]= 1;
3272         }else{
3273             int wrap_c = s->uvlinesize;
3274             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
3275             if(emu){
3276                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3277                 ptr= s->edge_emu_buffer;
3278             }
3279             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
3280
3281             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
3282             if(emu){
3283                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3284                 ptr= s->edge_emu_buffer;
3285             }
3286             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
3287         }
3288     }else{
3289         op_pixels_func (*op_pix)[4];
3290         qpel_mc_func (*op_qpix)[16];
3291         uint8_t *dest_y, *dest_cb, *dest_cr;
3292         uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3293         int wrap_y, wrap_c;
3294         int emu=0;
3295
3296         dest_y  = s->dest[0];
3297         dest_cb = s->dest[1];
3298         dest_cr = s->dest[2];
3299         wrap_y = s->linesize;
3300         wrap_c = s->uvlinesize;
3301         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
3302         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
3303         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
3304
3305         if ((!s->no_rounding) || s->pict_type==B_TYPE){
3306             op_pix = s->dsp.put_pixels_tab;
3307             op_qpix= s->dsp.put_qpel_pixels_tab;
3308         }else{
3309             op_pix = s->dsp.put_no_rnd_pixels_tab;
3310             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3311         }
3312
3313         if (s->mv_dir & MV_DIR_FORWARD) {
3314             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3315             op_pix = s->dsp.avg_pixels_tab;
3316             op_qpix= s->dsp.avg_qpel_pixels_tab;
3317         }
3318         if (s->mv_dir & MV_DIR_BACKWARD) {
3319             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3320         }
3321
3322         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
3323             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
3324             ptr_y= s->edge_emu_buffer;
3325             emu=1;
3326         }
3327         
3328         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3329             int progressive_score, interlaced_score;
3330             
3331             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
3332                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
3333             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
3334                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
3335             
3336             if(progressive_score > interlaced_score + 600){
3337                 s->interlaced_dct=1;
3338             
3339                 dct_offset= wrap_y;
3340                 wrap_y<<=1;
3341             }else
3342                 s->interlaced_dct=0;
3343         }
3344         
3345         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
3346         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
3347         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
3348         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
3349         
3350         if(s->flags&CODEC_FLAG_GRAY){
3351             skip_dct[4]= 1;
3352             skip_dct[5]= 1;
3353         }else{
3354             if(emu){
3355                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3356                 ptr_cb= s->edge_emu_buffer;
3357             }
3358             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
3359             if(emu){
3360                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
3361                 ptr_cr= s->edge_emu_buffer;
3362             }
3363             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
3364         }
3365         /* pre quantization */         
3366         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
3367             //FIXME optimize
3368             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
3369             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
3370             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
3371             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
3372             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
3373             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
3374 #if 0
3375 {
3376  static int stat[7];
3377  int num=0;
3378  for(i=0; i<6; i++)
3379   if(skip_dct[i]) num++;
3380  stat[num]++;
3381  
3382  if(s->mb_x==0 && s->mb_y==0){
3383   for(i=0; i<7; i++){
3384    printf("%6d %1d\n", stat[i], i);
3385   }
3386  }
3387 }
3388 #endif
3389         }
3390
3391     }
3392             
3393     /* DCT & quantize */
3394     if(s->out_format==FMT_MJPEG){
3395         for(i=0;i<6;i++) {
3396             int overflow;
3397             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
3398             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3399         }
3400     }else{
3401         for(i=0;i<6;i++) {
3402             if(!skip_dct[i]){
3403                 int overflow;
3404                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3405             // FIXME we could decide to change to quantizer instead of clipping
3406             // JS: I don't think that would be a good idea it could lower quality instead
3407             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3408                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3409             }else
3410                 s->block_last_index[i]= -1;
3411         }
3412         
3413         if(s->luma_elim_threshold && !s->mb_intra)
3414             for(i=0; i<4; i++)
3415                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3416         if(s->chroma_elim_threshold && !s->mb_intra)
3417             for(i=4; i<6; i++)
3418                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3419
3420         if(s->flags & CODEC_FLAG_CBP_RD){
3421             for(i=0;i<6;i++) {
3422                 if(s->block_last_index[i] == -1)
3423                     s->coded_score[i]= INT_MAX/256;
3424             }
3425         }
3426     }
3427
3428     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3429         s->block_last_index[4]=
3430         s->block_last_index[5]= 0;
3431         s->block[4][0]=
3432         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3433     }
3434
3435     /* huffman encode */
3436     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3437     case CODEC_ID_MPEG1VIDEO:
3438     case CODEC_ID_MPEG2VIDEO:
3439         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3440 #ifdef CONFIG_RISKY
3441     case CODEC_ID_MPEG4:
3442         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3443     case CODEC_ID_MSMPEG4V2:
3444     case CODEC_ID_MSMPEG4V3:
3445     case CODEC_ID_WMV1:
3446         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3447     case CODEC_ID_WMV2:
3448          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3449     case CODEC_ID_H263:
3450     case CODEC_ID_H263P:
3451     case CODEC_ID_FLV1:
3452     case CODEC_ID_RV10:
3453         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3454 #endif
3455     case CODEC_ID_MJPEG:
3456         mjpeg_encode_mb(s, s->block); break;
3457     default:
3458         assert(0);
3459     }
3460 }
3461
3462 #endif //CONFIG_ENCODERS
3463
3464 /**
3465  * combines the (truncated) bitstream to a complete frame
3466  * @returns -1 if no complete frame could be created
3467  */
3468 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
3469     ParseContext *pc= &s->parse_context;
3470
3471 #if 0
3472     if(pc->overread){
3473         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3474         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3475     }
3476 #endif
3477
3478     /* copy overreaded byes from last frame into buffer */
3479     for(; pc->overread>0; pc->overread--){
3480         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
3481     }
3482     
3483     pc->last_index= pc->index;
3484
3485     /* copy into buffer end return */
3486     if(next == END_NOT_FOUND){
3487         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3488
3489         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
3490         pc->index += *buf_size;
3491         return -1;
3492     }
3493
3494     *buf_size=
3495     pc->overread_index= pc->index + next;
3496     
3497     /* append to buffer */
3498     if(pc->index){
3499         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3500
3501         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
3502         pc->index = 0;
3503         *buf= pc->buffer;
3504     }
3505
3506     /* store overread bytes */
3507     for(;next < 0; next++){
3508         pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
3509         pc->overread++;
3510     }
3511
3512 #if 0
3513     if(pc->overread){
3514         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3515         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3516     }
3517 #endif
3518
3519     return 0;
3520 }
3521
3522 void ff_mpeg_flush(AVCodecContext *avctx){
3523     int i;
3524     MpegEncContext *s = avctx->priv_data;
3525     
3526     for(i=0; i<MAX_PICTURE_COUNT; i++){
3527        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3528                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3529         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3530     }
3531     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
3532     
3533     s->parse_context.state= -1;
3534     s->parse_context.frame_start_found= 0;
3535     s->parse_context.overread= 0;
3536     s->parse_context.overread_index= 0;
3537     s->parse_context.index= 0;
3538     s->parse_context.last_index= 0;
3539 }
3540
3541 #ifdef CONFIG_ENCODERS
3542 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3543 {
3544     int bytes= length>>4;
3545     int bits= length&15;
3546     int i;
3547
3548     if(length==0) return;
3549
3550     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
3551     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
3552 }
3553
3554 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3555     int i;
3556
3557     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3558
3559     /* mpeg1 */
3560     d->mb_skip_run= s->mb_skip_run;
3561     for(i=0; i<3; i++)
3562         d->last_dc[i]= s->last_dc[i];
3563     
3564     /* statistics */
3565     d->mv_bits= s->mv_bits;
3566     d->i_tex_bits= s->i_tex_bits;
3567     d->p_tex_bits= s->p_tex_bits;
3568     d->i_count= s->i_count;
3569     d->f_count= s->f_count;
3570     d->b_count= s->b_count;
3571     d->skip_count= s->skip_count;
3572     d->misc_bits= s->misc_bits;
3573     d->last_bits= 0;
3574
3575     d->mb_skiped= 0;
3576     d->qscale= s->qscale;
3577     d->dquant= s->dquant;
3578 }
3579
3580 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3581     int i;
3582
3583     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
3584     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3585     
3586     /* mpeg1 */
3587     d->mb_skip_run= s->mb_skip_run;
3588     for(i=0; i<3; i++)
3589         d->last_dc[i]= s->last_dc[i];
3590     
3591     /* statistics */
3592     d->mv_bits= s->mv_bits;
3593     d->i_tex_bits= s->i_tex_bits;
3594     d->p_tex_bits= s->p_tex_bits;
3595     d->i_count= s->i_count;
3596     d->f_count= s->f_count;
3597     d->b_count= s->b_count;
3598     d->skip_count= s->skip_count;
3599     d->misc_bits= s->misc_bits;
3600
3601     d->mb_intra= s->mb_intra;
3602     d->mb_skiped= s->mb_skiped;
3603     d->mv_type= s->mv_type;
3604     d->mv_dir= s->mv_dir;
3605     d->pb= s->pb;
3606     if(s->data_partitioning){
3607         d->pb2= s->pb2;
3608         d->tex_pb= s->tex_pb;
3609     }
3610     d->block= s->block;
3611     for(i=0; i<6; i++)
3612         d->block_last_index[i]= s->block_last_index[i];
3613     d->interlaced_dct= s->interlaced_dct;
3614     d->qscale= s->qscale;
3615 }
3616
3617 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
3618                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3619                            int *dmin, int *next_block, int motion_x, int motion_y)
3620 {
3621     int score;
3622     uint8_t *dest_backup[3];
3623     
3624     copy_context_before_encode(s, backup, type);
3625
3626     s->block= s->blocks[*next_block];
3627     s->pb= pb[*next_block];
3628     if(s->data_partitioning){
3629         s->pb2   = pb2   [*next_block];
3630         s->tex_pb= tex_pb[*next_block];
3631     }
3632     
3633     if(*next_block){
3634         memcpy(dest_backup, s->dest, sizeof(s->dest));
3635         s->dest[0] = s->me.scratchpad;
3636         s->dest[1] = s->me.scratchpad + 16;
3637         s->dest[2] = s->me.scratchpad + 16 + 8;
3638         assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding
3639         assert(s->linesize >= 64); //FIXME
3640     }
3641
3642     encode_mb(s, motion_x, motion_y);
3643     
3644     score= get_bit_count(&s->pb);
3645     if(s->data_partitioning){
3646         score+= get_bit_count(&s->pb2);
3647         score+= get_bit_count(&s->tex_pb);
3648     }
3649    
3650     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3651         MPV_decode_mb(s, s->block);
3652
3653         score *= s->lambda2;
3654         score += sse_mb(s) << FF_LAMBDA_SHIFT;
3655     }
3656     
3657     if(*next_block){
3658         memcpy(s->dest, dest_backup, sizeof(s->dest));
3659     }
3660
3661     if(score<*dmin){
3662         *dmin= score;
3663         *next_block^=1;
3664
3665         copy_context_after_encode(best, s, type);
3666     }
3667 }
3668                 
3669 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3670     uint32_t *sq = squareTbl + 256;
3671     int acc=0;
3672     int x,y;
3673     
3674     if(w==16 && h==16) 
3675         return s->dsp.sse[0](NULL, src1, src2, stride);
3676     else if(w==8 && h==8)
3677         return s->dsp.sse[1](NULL, src1, src2, stride);
3678     
3679     for(y=0; y<h; y++){
3680         for(x=0; x<w; x++){
3681             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3682         } 
3683     }
3684     
3685     assert(acc>=0);
3686     
3687     return acc;
3688 }
3689
3690 static int sse_mb(MpegEncContext *s){
3691     int w= 16;
3692     int h= 16;
3693
3694     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3695     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3696
3697     if(w==16 && h==16)
3698         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
3699                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
3700                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
3701     else
3702         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3703                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3704                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3705 }
3706
3707 static void encode_picture(MpegEncContext *s, int picture_number)
3708 {
3709     int mb_x, mb_y, pdif = 0;
3710     int i;
3711     int bits;
3712     MpegEncContext best_s, backup_s;
3713     uint8_t bit_buf[2][3000];
3714     uint8_t bit_buf2[2][3000];
3715     uint8_t bit_buf_tex[2][3000];
3716     PutBitContext pb[2], pb2[2], tex_pb[2];
3717
3718     for(i=0; i<2; i++){
3719         init_put_bits(&pb    [i], bit_buf    [i], 3000);
3720         init_put_bits(&pb2   [i], bit_buf2   [i], 3000);
3721         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000);
3722     }
3723
3724     s->picture_number = picture_number;
3725     
3726     /* Reset the average MB variance */
3727     s->current_picture.mb_var_sum = 0;
3728     s->current_picture.mc_mb_var_sum = 0;
3729
3730 #ifdef CONFIG_RISKY
3731     /* we need to initialize some time vars before we can encode b-frames */
3732     // RAL: Condition added for MPEG1VIDEO
3733     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
3734         ff_set_mpeg4_time(s, s->picture_number); 
3735 #endif
3736         
3737     s->scene_change_score=0;
3738     
3739     s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
3740     
3741     if(s->pict_type==I_TYPE){
3742         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3743         else                        s->no_rounding=0;
3744     }else if(s->pict_type!=B_TYPE){
3745         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3746             s->no_rounding ^= 1;          
3747     }
3748     
3749     /* Estimate motion for every MB */
3750     s->mb_intra=0; //for the rate distoration & bit compare functions
3751     if(s->pict_type != I_TYPE){
3752         if(s->pict_type != B_TYPE){
3753             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
3754                 s->me.pre_pass=1;
3755                 s->me.dia_size= s->avctx->pre_dia_size;
3756
3757                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
3758                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
3759                         s->mb_x = mb_x;
3760                         s->mb_y = mb_y;
3761                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
3762                     }
3763                 }
3764                 s->me.pre_pass=0;
3765             }
3766         }
3767
3768         s->me.dia_size= s->avctx->dia_size;
3769         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3770             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3771             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3772             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3773             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3774             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3775                 s->mb_x = mb_x;
3776                 s->mb_y = mb_y;
3777                 s->block_index[0]+=2;
3778                 s->block_index[1]+=2;
3779                 s->block_index[2]+=2;
3780                 s->block_index[3]+=2;
3781                 
3782                 /* compute motion vector & mb_type and store in context */
3783                 if(s->pict_type==B_TYPE)
3784                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
3785                 else
3786                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
3787             }
3788         }
3789     }else /* if(s->pict_type == I_TYPE) */{
3790         /* I-Frame */
3791         //FIXME do we need to zero them?
3792         memset(s->current_picture.motion_val[0][0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3793         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3794         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3795         
3796         if(!s->fixed_qscale){
3797             /* finding spatial complexity for I-frame rate control */
3798             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3799                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3800                     int xx = mb_x * 16;
3801                     int yy = mb_y * 16;
3802                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
3803                     int varc;
3804                     int sum = s->dsp.pix_sum(pix, s->linesize);
3805     
3806                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
3807
3808                     s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
3809                     s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
3810                     s->current_picture.mb_var_sum    += varc;
3811                 }
3812             }
3813         }
3814     }
3815     emms_c();
3816
3817     if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
3818         s->pict_type= I_TYPE;
3819         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3820 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3821     }
3822
3823     if(!s->umvplus){
3824         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3825             s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3826         
3827             ff_fix_long_p_mvs(s);
3828         }
3829
3830         if(s->pict_type==B_TYPE){
3831             int a, b;
3832
3833             a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3834             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3835             s->f_code = FFMAX(a, b);
3836
3837             a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3838             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3839             s->b_code = FFMAX(a, b);
3840
3841             ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3842             ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3843             ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3844             ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3845         }
3846     }
3847     
3848     if (!s->fixed_qscale) 
3849         s->current_picture.quality = ff_rate_estimate_qscale(s);
3850
3851     if(s->adaptive_quant){
3852 #ifdef CONFIG_RISKY
3853         switch(s->codec_id){
3854         case CODEC_ID_MPEG4:
3855             ff_clean_mpeg4_qscales(s);
3856             break;
3857         case CODEC_ID_H263:
3858         case CODEC_ID_H263P:
3859         case CODEC_ID_FLV1:
3860             ff_clean_h263_qscales(s);
3861             break;
3862         }
3863 #endif
3864
3865         s->lambda= s->lambda_table[0];
3866         //FIXME broken
3867     }else
3868         s->lambda= s->current_picture.quality;
3869 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3870     update_qscale(s);
3871     
3872     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
3873         s->qscale= 3; //reduce cliping problems
3874         
3875     if (s->out_format == FMT_MJPEG) {
3876         /* for mjpeg, we do include qscale in the matrix */
3877         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3878         for(i=1;i<64;i++){
3879             int j= s->dsp.idct_permutation[i];
3880
3881             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3882         }
3883         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
3884                        s->intra_matrix, s->intra_quant_bias, 8, 8);
3885     }
3886     
3887     //FIXME var duplication
3888     s->current_picture.key_frame= s->pict_type == I_TYPE;
3889     s->current_picture.pict_type= s->pict_type;
3890
3891     if(s->current_picture.key_frame)
3892         s->picture_in_gop_number=0;
3893
3894     s->last_bits= get_bit_count(&s->pb);
3895     switch(s->out_format) {
3896     case FMT_MJPEG:
3897         mjpeg_picture_header(s);
3898         break;
3899 #ifdef CONFIG_RISKY
3900     case FMT_H263:
3901         if (s->codec_id == CODEC_ID_WMV2) 
3902             ff_wmv2_encode_picture_header(s, picture_number);
3903         else if (s->h263_msmpeg4) 
3904             msmpeg4_encode_picture_header(s, picture_number);
3905         else if (s->h263_pred)
3906             mpeg4_encode_picture_header(s, picture_number);
3907         else if (s->codec_id == CODEC_ID_RV10) 
3908             rv10_encode_picture_header(s, picture_number);
3909         else if (s->codec_id == CODEC_ID_FLV1)
3910             ff_flv_encode_picture_header(s, picture_number);
3911         else
3912             h263_encode_picture_header(s, picture_number);
3913         break;
3914 #endif
3915     case FMT_MPEG1:
3916         mpeg1_encode_picture_header(s, picture_number);
3917         break;
3918     case FMT_H264:
3919         break;
3920     }
3921     bits= get_bit_count(&s->pb);
3922     s->header_bits= bits - s->last_bits;
3923     s->last_bits= bits;
3924     s->mv_bits=0;
3925     s->misc_bits=0;
3926     s->i_tex_bits=0;
3927     s->p_tex_bits=0;
3928     s->i_count=0;
3929     s->f_count=0;
3930     s->b_count=0;
3931     s->skip_count=0;
3932
3933     for(i=0; i<3; i++){
3934         /* init last dc values */
3935         /* note: quant matrix value (8) is implied here */
3936         s->last_dc[i] = 128;
3937         
3938         s->current_picture_ptr->error[i] = 0;
3939     }
3940     s->mb_skip_run = 0;
3941     s->last_mv[0][0][0] = 0;
3942     s->last_mv[0][0][1] = 0;
3943     s->last_mv[1][0][0] = 0;
3944     s->last_mv[1][0][1] = 0;
3945      
3946     s->last_mv_dir = 0;
3947
3948 #ifdef CONFIG_RISKY
3949     switch(s->codec_id){
3950     case CODEC_ID_H263:
3951     case CODEC_ID_H263P:
3952     case CODEC_ID_FLV1:
3953         s->gob_index = ff_h263_get_gob_height(s);
3954         break;
3955     case CODEC_ID_MPEG4:
3956         if(s->partitioned_frame)
3957             ff_mpeg4_init_partitions(s);
3958         break;
3959     }
3960 #endif
3961
3962     s->resync_mb_x=0;
3963     s->resync_mb_y=0;
3964     s->first_slice_line = 1;
3965     s->ptr_lastgob = s->pb.buf;
3966     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3967         s->mb_x=0;
3968         s->mb_y= mb_y;
3969
3970         ff_set_qscale(s, s->qscale);
3971         ff_init_block_index(s);
3972         
3973         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3974             const int xy= mb_y*s->mb_stride + mb_x;
3975             int mb_type= s->mb_type[xy];
3976 //            int d;
3977             int dmin= INT_MAX;
3978
3979             s->mb_x = mb_x;
3980             ff_update_block_index(s);
3981
3982             /* write gob / video packet header  */
3983 #ifdef CONFIG_RISKY
3984             if(s->rtp_mode){
3985                 int current_packet_size, is_gob_start;
3986                 
3987                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
3988                 
3989                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; 
3990                 
3991                 switch(s->codec_id){
3992                 case CODEC_ID_H263:
3993                 case CODEC_ID_H263P:
3994                     if(!s->h263_slice_structured)
3995                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
3996                     break;
3997                 case CODEC_ID_MPEG2VIDEO:
3998                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
3999                 case CODEC_ID_MPEG1VIDEO:
4000                     if(s->mb_skip_run) is_gob_start=0;
4001                     break;
4002                 }
4003                 
4004                 if(is_gob_start){
4005                     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4006                         ff_mpeg4_merge_partitions(s);
4007                         ff_mpeg4_init_partitions(s);
4008                     }
4009                 
4010                     if(s->codec_id==CODEC_ID_MPEG4) 
4011                         ff_mpeg4_stuffing(&s->pb);
4012
4013                     align_put_bits(&s->pb);
4014                     flush_put_bits(&s->pb);
4015
4016                     assert((get_bit_count(&s->pb)&7) == 0);
4017                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4018                     
4019                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4020                         int r= get_bit_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y;
4021                         int d= 100 / s->avctx->error_rate;
4022                         if(r % d == 0){
4023                             current_packet_size=0;
4024 #ifndef ALT_BITSTREAM_WRITER
4025                             s->pb.buf_ptr= s->ptr_lastgob;
4026 #endif
4027                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4028                         }
4029                     }
4030         
4031                     if (s->avctx->rtp_callback)
4032                         s->avctx->rtp_callback(s->ptr_lastgob, current_packet_size, 0);
4033                     
4034                     switch(s->codec_id){
4035                     case CODEC_ID_MPEG4:
4036                         ff_mpeg4_encode_video_packet_header(s);
4037                         ff_mpeg4_clean_buffers(s);
4038                     break;
4039                     case CODEC_ID_MPEG1VIDEO:
4040                     case CODEC_ID_MPEG2VIDEO:
4041                         ff_mpeg1_encode_slice_header(s);
4042                         ff_mpeg1_clean_buffers(s);
4043                     break;
4044                     case CODEC_ID_H263:
4045                     case CODEC_ID_H263P:
4046                         h263_encode_gob_header(s, mb_y);                       
4047                     break;
4048                     }
4049
4050                     if(s->flags&CODEC_FLAG_PASS1){
4051                         int bits= get_bit_count(&s->pb);
4052                         s->misc_bits+= bits - s->last_bits;
4053                         s->last_bits= bits;
4054                     }
4055     
4056                     s->ptr_lastgob += current_packet_size;
4057                     s->first_slice_line=1;
4058                     s->resync_mb_x=mb_x;
4059                     s->resync_mb_y=mb_y;
4060                 }
4061             }
4062 #endif
4063
4064             if(  (s->resync_mb_x   == s->mb_x)
4065                && s->resync_mb_y+1 == s->mb_y){
4066                 s->first_slice_line=0; 
4067             }
4068
4069             s->mb_skiped=0;
4070             s->dquant=0; //only for QP_RD
4071
4072             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible
4073                 int next_block=0;
4074                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
4075
4076                 copy_context_before_encode(&backup_s, s, -1);
4077                 backup_s.pb= s->pb;
4078                 best_s.data_partitioning= s->data_partitioning;
4079                 best_s.partitioned_frame= s->partitioned_frame;
4080                 if(s->data_partitioning){
4081                     backup_s.pb2= s->pb2;
4082                     backup_s.tex_pb= s->tex_pb;
4083                 }
4084
4085                 if(mb_type&MB_TYPE_INTER){
4086                     s->mv_dir = MV_DIR_FORWARD;
4087                     s->mv_type = MV_TYPE_16X16;
4088                     s->mb_intra= 0;
4089                     s->mv[0][0][0] = s->p_mv_table[xy][0];
4090                     s->mv[0][0][1] = s->p_mv_table[xy][1];
4091                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, 
4092                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4093                 }
4094                 if(mb_type&MB_TYPE_SKIPED){
4095                     s->mv_dir = MV_DIR_FORWARD;
4096                     s->mv_type = MV_TYPE_16X16;
4097                     s->mb_intra= 0;
4098                     s->mv[0][0][0] = 0;
4099                     s->mv[0][0][1] = 0;
4100                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_SKIPED, pb, pb2, tex_pb, 
4101                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4102                 }
4103                 if(mb_type&MB_TYPE_INTER4V){                 
4104                     s->mv_dir = MV_DIR_FORWARD;
4105                     s->mv_type = MV_TYPE_8X8;
4106                     s->mb_intra= 0;
4107                     for(i=0; i<4; i++){
4108                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4109                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4110                     }
4111                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, 
4112                                  &dmin, &next_block, 0, 0);
4113                 }
4114                 if(mb_type&MB_TYPE_FORWARD){
4115                     s->mv_dir = MV_DIR_FORWARD;
4116                     s->mv_type = MV_TYPE_16X16;
4117                     s->mb_intra= 0;
4118                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4119                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4120                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, 
4121                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
4122                 }
4123                 if(mb_type&MB_TYPE_BACKWARD){
4124                     s->mv_dir = MV_DIR_BACKWARD;
4125                     s->mv_type = MV_TYPE_16X16;
4126                     s->mb_intra= 0;
4127                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4128                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4129                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
4130                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
4131                 }
4132                 if(mb_type&MB_TYPE_BIDIR){
4133                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4134                     s->mv_type = MV_TYPE_16X16;
4135                     s->mb_intra= 0;
4136                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4137                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4138                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4139                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4140                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, 
4141                                  &dmin, &next_block, 0, 0);
4142                 }
4143                 if(mb_type&MB_TYPE_DIRECT){
4144                     int mx= s->b_direct_mv_table[xy][0];
4145                     int my= s->b_direct_mv_table[xy][1];
4146                     
4147                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4148                     s->mb_intra= 0;
4149 #ifdef CONFIG_RISKY
4150                     ff_mpeg4_set_direct_mv(s, mx, my);
4151 #endif
4152                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
4153                                  &dmin, &next_block, mx, my);
4154                 }
4155                 if(mb_type&MB_TYPE_INTRA){
4156                     s->mv_dir = 0;
4157                     s->mv_type = MV_TYPE_16X16;
4158                     s->mb_intra= 1;
4159                     s->mv[0][0][0] = 0;
4160                     s->mv[0][0][1] = 0;
4161                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, 
4162                                  &dmin, &next_block, 0, 0);
4163                     if(s->h263_pred || s->h263_aic){
4164                         if(best_s.mb_intra)
4165                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
4166                         else
4167                             ff_clean_intra_table_entries(s); //old mode?
4168                     }
4169                 }
4170
4171                 if(s->flags & CODEC_FLAG_QP_RD){
4172                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
4173                         const int last_qp= backup_s.qscale;
4174                         int dquant, dir, qp, dc[6];
4175                         DCTELEM ac[6][16];
4176                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
4177                         
4178                         assert(backup_s.dquant == 0);
4179
4180                         //FIXME intra
4181                         s->mv_dir= best_s.mv_dir;
4182                         s->mv_type = MV_TYPE_16X16;
4183                         s->mb_intra= best_s.mb_intra;
4184                         s->mv[0][0][0] = best_s.mv[0][0][0];
4185                         s->mv[0][0][1] = best_s.mv[0][0][1];
4186                         s->mv[1][0][0] = best_s.mv[1][0][0];
4187                         s->mv[1][0][1] = best_s.mv[1][0][1];
4188                         
4189                         dir= s->pict_type == B_TYPE ? 2 : 1;
4190                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
4191                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
4192                             qp= last_qp + dquant;
4193                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
4194                                 break;
4195                             backup_s.dquant= dquant;
4196                             if(s->mb_intra){
4197                                 for(i=0; i<6; i++){
4198                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
4199                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
4200                                 }
4201                             }
4202
4203                             encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
4204                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
4205                             if(best_s.qscale != qp){
4206                                 if(s->mb_intra){
4207                                     for(i=0; i<6; i++){
4208                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
4209                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
4210                                     }
4211                                 }
4212                                 if(dir > 0 && dquant==dir){
4213                                     dquant= 0;
4214                                     dir= -dir;
4215                                 }else
4216                                     break;
4217                             }
4218                         }
4219                         qp= best_s.qscale;
4220                         s->current_picture.qscale_table[xy]= qp;
4221                     }
4222                 }
4223
4224                 copy_context_after_encode(s, &best_s, -1);
4225                 
4226                 pb_bits_count= get_bit_count(&s->pb);
4227                 flush_put_bits(&s->pb);
4228                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
4229                 s->pb= backup_s.pb;
4230                 
4231                 if(s->data_partitioning){
4232                     pb2_bits_count= get_bit_count(&s->pb2);
4233                     flush_put_bits(&s->pb2);
4234                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
4235                     s->pb2= backup_s.pb2;
4236                     
4237                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
4238                     flush_put_bits(&s->tex_pb);
4239                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
4240                     s->tex_pb= backup_s.tex_pb;
4241                 }
4242                 s->last_bits= get_bit_count(&s->pb);
4243                
4244 #ifdef CONFIG_RISKY
4245                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4246                     ff_h263_update_motion_val(s);
4247 #endif
4248         
4249                 if(next_block==0){
4250                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad     , s->linesize  ,16);
4251                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8);
4252                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8);
4253                 }
4254
4255                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
4256                     MPV_decode_mb(s, s->block);
4257             } else {
4258                 int motion_x, motion_y;
4259                 s->mv_type=MV_TYPE_16X16;
4260                 // only one MB-Type possible
4261                 
4262                 switch(mb_type){
4263                 case MB_TYPE_INTRA:
4264                     s->mv_dir = 0;
4265                     s->mb_intra= 1;
4266                     motion_x= s->mv[0][0][0] = 0;
4267                     motion_y= s->mv[0][0][1] = 0;
4268                     break;
4269                 case MB_TYPE_INTER:
4270                     s->mv_dir = MV_DIR_FORWARD;
4271                     s->mb_intra= 0;
4272                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
4273                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
4274                     break;
4275                 case MB_TYPE_INTER4V:
4276                     s->mv_dir = MV_DIR_FORWARD;
4277                     s->mv_type = MV_TYPE_8X8;
4278                     s->mb_intra= 0;
4279                     for(i=0; i<4; i++){
4280                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
4281                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
4282                     }
4283                     motion_x= motion_y= 0;
4284                     break;
4285                 case MB_TYPE_DIRECT:
4286                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
4287                     s->mb_intra= 0;
4288                     motion_x=s->b_direct_mv_table[xy][0];
4289                     motion_y=s->b_direct_mv_table[xy][1];
4290 #ifdef CONFIG_RISKY
4291                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
4292 #endif
4293                     break;
4294                 case MB_TYPE_BIDIR:
4295                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
4296                     s->mb_intra= 0;
4297                     motion_x=0;
4298                     motion_y=0;
4299                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
4300                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
4301                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
4302                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
4303                     break;
4304                 case MB_TYPE_BACKWARD:
4305                     s->mv_dir = MV_DIR_BACKWARD;
4306                     s->mb_intra= 0;
4307                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
4308                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
4309                     break;
4310                 case MB_TYPE_FORWARD:
4311                     s->mv_dir = MV_DIR_FORWARD;
4312                     s->mb_intra= 0;
4313                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
4314                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
4315 //                    printf(" %d %d ", motion_x, motion_y);
4316                     break;
4317                 default:
4318                     motion_x=motion_y=0; //gcc warning fix
4319                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
4320                 }
4321
4322                 encode_mb(s, motion_x, motion_y);
4323
4324                 // RAL: Update last macrobloc type
4325                 s->last_mv_dir = s->mv_dir;
4326             
4327 #ifdef CONFIG_RISKY
4328                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
4329                     ff_h263_update_motion_val(s);
4330 #endif
4331                 
4332                 MPV_decode_mb(s, s->block);
4333             }
4334
4335             /* clean the MV table in IPS frames for direct mode in B frames */
4336             if(s->mb_intra /* && I,P,S_TYPE */){
4337                 s->p_mv_table[xy][0]=0;
4338                 s->p_mv_table[xy][1]=0;
4339             }
4340             
4341             if(s->flags&CODEC_FLAG_PSNR){
4342                 int w= 16;
4343                 int h= 16;
4344
4345                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4346                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4347
4348                 s->current_picture_ptr->error[0] += sse(
4349                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
4350                     s->dest[0], w, h, s->linesize);
4351                 s->current_picture_ptr->error[1] += sse(
4352                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4353                     s->dest[1], w>>1, h>>1, s->uvlinesize);
4354                 s->current_picture_ptr->error[2] += sse(
4355                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
4356                     s->dest[2], w>>1, h>>1, s->uvlinesize);
4357             }
4358             if(s->loop_filter)
4359                 ff_h263_loop_filter(s);
4360 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
4361         }
4362     }
4363     emms_c();
4364
4365 #ifdef CONFIG_RISKY
4366     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
4367         ff_mpeg4_merge_partitions(s);
4368
4369     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
4370         msmpeg4_encode_ext_header(s);
4371
4372     if(s->codec_id==CODEC_ID_MPEG4) 
4373         ff_mpeg4_stuffing(&s->pb);
4374 #endif
4375
4376     /* Send the last GOB if RTP */    
4377     if (s->avctx->rtp_callback) {
4378         flush_put_bits(&s->pb);
4379         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
4380         /* Call the RTP callback to send the last GOB */
4381         s->avctx->rtp_callback(s->ptr_lastgob, pdif, 0);
4382     }
4383 }
4384
4385 #endif //CONFIG_ENCODERS
4386
4387 void ff_denoise_dct(MpegEncContext *s, DCTELEM *block){
4388     const int intra= s->mb_intra;
4389     int i;
4390
4391     s->dct_count[intra]++;
4392
4393     for(i=0; i<64; i++){
4394         int level= block[i];
4395
4396         if(level){
4397             if(level>0){
4398                 s->dct_error_sum[intra][i] += level;
4399                 level -= s->dct_offset[intra][i];
4400                 if(level<0) level=0;
4401             }else{
4402                 s->dct_error_sum[intra][i] -= level;
4403                 level += s->dct_offset[intra][i];
4404                 if(level>0) level=0;
4405             }
4406             block[i]= level;
4407         }
4408     }
4409 }
4410
4411 #ifdef CONFIG_ENCODERS
4412
4413 static int dct_quantize_trellis_c(MpegEncContext *s, 
4414                         DCTELEM *block, int n,
4415                         int qscale, int *overflow){
4416     const int *qmat;
4417     const uint8_t *scantable= s->intra_scantable.scantable;
4418     int max=0;
4419     unsigned int threshold1, threshold2;
4420     int bias=0;
4421     int run_tab[65];
4422     int level_tab[65];
4423     int score_tab[65];
4424     int last_run=0;
4425     int last_level=0;
4426     int last_score= 0;
4427     int last_i= 0;
4428     int not_coded_score= 0;
4429     int coeff[3][64];
4430     int coeff_count[64];
4431     int qmul, qadd, start_i, last_non_zero, i, dc;
4432     const int esc_length= s->ac_esc_length;
4433     uint8_t * length;
4434     uint8_t * last_length;
4435     int score_limit=0;
4436     int left_limit= 0;
4437     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
4438     const int patch_table= s->out_format == FMT_MPEG1 && !s->mb_intra;
4439         
4440     s->dsp.fdct (block);
4441     
4442     if(s->dct_error_sum)
4443         ff_denoise_dct(s, block);
4444     
4445     qmul= qscale*16;
4446     qadd= ((qscale-1)|1)*8;
4447
4448     if (s->mb_intra) {
4449         int q;
4450         if (!s->h263_aic) {
4451             if (n < 4)
4452                 q = s->y_dc_scale;
4453             else
4454                 q = s->c_dc_scale;
4455             q = q << 3;
4456         } else{
4457             /* For AIC we skip quant/dequant of INTRADC */
4458             q = 1 << 3;
4459             qadd=0;
4460         }
4461             
4462         /* note: block[0] is assumed to be positive */
4463         block[0] = (block[0] + (q >> 1)) / q;
4464         start_i = 1;
4465         last_non_zero = 0;
4466         qmat = s->q_intra_matrix[qscale];
4467         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4468             bias= 1<<(QMAT_SHIFT-1);
4469         length     = s->intra_ac_vlc_length;
4470         last_length= s->intra_ac_vlc_last_length;
4471     } else {
4472         start_i = 0;
4473         last_non_zero = -1;
4474         qmat = s->q_inter_matrix[qscale];
4475         length     = s->inter_ac_vlc_length;
4476         last_length= s->inter_ac_vlc_last_length;
4477     }
4478
4479     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4480     threshold2= (threshold1<<1);
4481
4482     for(i=start_i; i<64; i++) {
4483         const int j = scantable[i];
4484         const int k= i-start_i;
4485         int level = block[j];
4486         level = level * qmat[j];
4487
4488 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
4489 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
4490         if(((unsigned)(level+threshold1))>threshold2){
4491             if(level>0){
4492                 level= (bias + level)>>QMAT_SHIFT;
4493                 coeff[0][k]= level;
4494                 coeff[1][k]= level-1;
4495 //                coeff[2][k]= level-2;
4496             }else{
4497                 level= (bias - level)>>QMAT_SHIFT;
4498                 coeff[0][k]= -level;
4499                 coeff[1][k]= -level+1;
4500 //                coeff[2][k]= -level+2;
4501             }
4502             coeff_count[k]= FFMIN(level, 2);
4503             assert(coeff_count[k]);
4504             max |=level;
4505             last_non_zero = i;
4506         }else{
4507             coeff[0][k]= (level>>31)|1;
4508             coeff_count[k]= 1;
4509         }
4510     }
4511     
4512     *overflow= s->max_qcoeff < max; //overflow might have happend
4513     
4514     if(last_non_zero < start_i){
4515         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4516         return last_non_zero;
4517     }
4518
4519     score_tab[0]= 0;
4520     
4521     if(patch_table){
4522 //        length[UNI_AC_ENC_INDEX(0, 63)]=
4523 //        length[UNI_AC_ENC_INDEX(0, 65)]= 2;
4524     }
4525
4526     for(i=0; i<=last_non_zero - start_i; i++){
4527         int level_index, run, j;
4528         const int dct_coeff= block[ scantable[i + start_i] ];
4529         const int zero_distoration= dct_coeff*dct_coeff;
4530         int best_score=256*256*256*120;
4531
4532         last_score += zero_distoration;
4533         not_coded_score += zero_distoration;
4534         for(level_index=0; level_index < coeff_count[i]; level_index++){
4535             int distoration;
4536             int level= coeff[level_index][i];
4537             int unquant_coeff;
4538             
4539             assert(level);
4540
4541             if(s->out_format == FMT_H263){
4542                 if(level>0){
4543                     unquant_coeff= level*qmul + qadd;
4544                 }else{
4545                     unquant_coeff= level*qmul - qadd;
4546                 }
4547             }else{ //MPEG1
4548                 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
4549                 if(s->mb_intra){
4550                     if (level < 0) {
4551                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
4552                         unquant_coeff = -((unquant_coeff - 1) | 1);
4553                     } else {
4554                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
4555                         unquant_coeff =   (unquant_coeff - 1) | 1;
4556                     }
4557                 }else{
4558                     if (level < 0) {
4559                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4560                         unquant_coeff = -((unquant_coeff - 1) | 1);
4561                     } else {
4562                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4563                         unquant_coeff =   (unquant_coeff - 1) | 1;
4564                     }
4565                 }
4566                 unquant_coeff<<= 3;
4567             }
4568
4569             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
4570             level+=64;
4571             if((level&(~127)) == 0){
4572                 for(run=0; run<=i - left_limit; run++){
4573                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4574                     score += score_tab[i-run];
4575                     
4576                     if(score < best_score){
4577                         best_score= 
4578                         score_tab[i+1]= score;
4579                         run_tab[i+1]= run;
4580                         level_tab[i+1]= level-64;
4581                     }
4582                 }
4583
4584                 if(s->out_format == FMT_H263){
4585                     for(run=0; run<=i - left_limit; run++){
4586                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4587                         score += score_tab[i-run];
4588                         if(score < last_score){
4589                             last_score= score;
4590                             last_run= run;
4591                             last_level= level-64;
4592                             last_i= i+1;
4593                         }
4594                     }
4595                 }
4596             }else{
4597                 distoration += esc_length*lambda;
4598                 for(run=0; run<=i - left_limit; run++){
4599                     int score= distoration + score_tab[i-run];
4600                     
4601                     if(score < best_score){
4602                         best_score= 
4603                         score_tab[i+1]= score;
4604                         run_tab[i+1]= run;
4605                         level_tab[i+1]= level-64;
4606                     }
4607                 }
4608
4609                 if(s->out_format == FMT_H263){
4610                     for(run=0; run<=i - left_limit; run++){
4611                         int score= distoration + score_tab[i-run];
4612                         if(score < last_score){
4613                             last_score= score;
4614                             last_run= run;
4615                             last_level= level-64;
4616                             last_i= i+1;
4617                         }
4618                     }
4619                 }
4620             }
4621         }
4622
4623         for(j=left_limit; j<=i; j++){
4624             score_tab[j] += zero_distoration;
4625         }
4626         score_limit+= zero_distoration;
4627         if(score_tab[i+1] < score_limit)
4628             score_limit= score_tab[i+1];
4629         
4630         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4631         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
4632     
4633         if(patch_table){
4634 //            length[UNI_AC_ENC_INDEX(0, 63)]=
4635 //            length[UNI_AC_ENC_INDEX(0, 65)]= 3;
4636         }
4637     }
4638
4639     if(s->out_format != FMT_H263){
4640         last_score= 256*256*256*120;
4641         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
4642             int score= score_tab[i];
4643             if(i) score += lambda*2; //FIXME exacter?
4644
4645             if(score < last_score){
4646                 last_score= score;
4647                 last_i= i;
4648                 last_level= level_tab[i];
4649                 last_run= run_tab[i];
4650             }
4651         }
4652     }
4653
4654     s->coded_score[n] = last_score - not_coded_score;
4655     
4656     dc= block[0];
4657     last_non_zero= last_i - 1 + start_i;
4658     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4659     
4660     if(last_non_zero < start_i)
4661         return last_non_zero;
4662
4663     if(last_non_zero == 0 && start_i == 0){
4664         int best_level= 0;
4665         int best_score= dc * dc;
4666         
4667         for(i=0; i<coeff_count[0]; i++){
4668             int level= coeff[i][0];
4669             int unquant_coeff, score, distoration;
4670
4671             if(s->out_format == FMT_H263){
4672                 if(level>0){
4673                     unquant_coeff= (level*qmul + qadd)>>3;
4674                 }else{
4675                     unquant_coeff= (level*qmul - qadd)>>3;
4676                 }
4677             }else{ //MPEG1
4678                     if (level < 0) {
4679                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4680                         unquant_coeff = -((unquant_coeff - 1) | 1);
4681                     } else {
4682                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4683                         unquant_coeff =   (unquant_coeff - 1) | 1;
4684                     }
4685             }
4686             unquant_coeff = (unquant_coeff + 4) >> 3;
4687             unquant_coeff<<= 3 + 3;
4688
4689             distoration= (unquant_coeff - dc) * (unquant_coeff - dc);
4690             level+=64;
4691             if((level&(~127)) == 0)
4692                 score= distoration + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4693             else
4694                 score= distoration + esc_length*lambda;
4695
4696             if(score < best_score){
4697                 best_score= score;
4698                 best_level= level - 64;
4699             }
4700         }
4701         block[0]= best_level;
4702         s->coded_score[n] = best_score - dc*dc;
4703         if(best_level == 0) return -1;
4704         else                return last_non_zero;
4705     }
4706
4707     i= last_i;
4708     assert(last_level);
4709 //FIXME use permutated scantable
4710     block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
4711     i -= last_run + 1;
4712     
4713     for(;i>0 ; i -= run_tab[i] + 1){
4714         const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
4715     
4716         block[j]= level_tab[i];
4717         assert(block[j]);
4718     }
4719
4720     return last_non_zero;
4721 }
4722
4723 static int dct_quantize_c(MpegEncContext *s, 
4724                         DCTELEM *block, int n,
4725                         int qscale, int *overflow)
4726 {
4727     int i, j, level, last_non_zero, q;
4728     const int *qmat;
4729     const uint8_t *scantable= s->intra_scantable.scantable;
4730     int bias;
4731     int max=0;
4732     unsigned int threshold1, threshold2;
4733
4734     s->dsp.fdct (block);
4735
4736     if(s->dct_error_sum)
4737         ff_denoise_dct(s, block);
4738
4739     if (s->mb_intra) {
4740         if (!s->h263_aic) {
4741             if (n < 4)
4742                 q = s->y_dc_scale;
4743             else
4744                 q = s->c_dc_scale;
4745             q = q << 3;
4746         } else
4747             /* For AIC we skip quant/dequant of INTRADC */
4748             q = 1 << 3;
4749             
4750         /* note: block[0] is assumed to be positive */
4751         block[0] = (block[0] + (q >> 1)) / q;
4752         i = 1;
4753         last_non_zero = 0;
4754         qmat = s->q_intra_matrix[qscale];
4755         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4756     } else {
4757         i = 0;
4758         last_non_zero = -1;
4759         qmat = s->q_inter_matrix[qscale];
4760         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4761     }
4762     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4763     threshold2= (threshold1<<1);
4764
4765     for(;i<64;i++) {
4766         j = scantable[i];
4767         level = block[j];
4768         level = level * qmat[j];
4769
4770 //        if(   bias+level >= (1<<QMAT_SHIFT)
4771 //           || bias-level >= (1<<QMAT_SHIFT)){
4772         if(((unsigned)(level+threshold1))>threshold2){
4773             if(level>0){
4774                 level= (bias + level)>>QMAT_SHIFT;
4775                 block[j]= level;
4776             }else{
4777                 level= (bias - level)>>QMAT_SHIFT;
4778                 block[j]= -level;
4779             }
4780             max |=level;
4781             last_non_zero = i;
4782         }else{
4783             block[j]=0;
4784         }
4785     }
4786     *overflow= s->max_qcoeff < max; //overflow might have happend
4787     
4788     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4789     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4790         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4791
4792     return last_non_zero;
4793 }
4794
4795 #endif //CONFIG_ENCODERS
4796
4797 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
4798                                    DCTELEM *block, int n, int qscale)
4799 {
4800     int i, level, nCoeffs;
4801     const uint16_t *quant_matrix;
4802
4803     nCoeffs= s->block_last_index[n];
4804     
4805     if (n < 4) 
4806         block[0] = block[0] * s->y_dc_scale;
4807     else
4808         block[0] = block[0] * s->c_dc_scale;
4809     /* XXX: only mpeg1 */
4810     quant_matrix = s->intra_matrix;
4811     for(i=1;i<=nCoeffs;i++) {
4812         int j= s->intra_scantable.permutated[i];
4813         level = block[j];
4814         if (level) {
4815             if (level < 0) {
4816                 level = -level;
4817                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
4818                 level = (level - 1) | 1;
4819                 level = -level;
4820             } else {
4821                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
4822                 level = (level - 1) | 1;
4823             }
4824             block[j] = level;
4825         }
4826     }
4827 }
4828
4829 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
4830                                    DCTELEM *block, int n, int qscale)
4831 {
4832     int i, level, nCoeffs;
4833     const uint16_t *quant_matrix;
4834
4835     nCoeffs= s->block_last_index[n];
4836     
4837     quant_matrix = s->inter_matrix;
4838     for(i=0; i<=nCoeffs; i++) {
4839         int j= s->intra_scantable.permutated[i];
4840         level = block[j];
4841         if (level) {
4842             if (level < 0) {
4843                 level = -level;
4844                 level = (((level << 1) + 1) * qscale *
4845                          ((int) (quant_matrix[j]))) >> 4;
4846                 level = (level - 1) | 1;
4847                 level = -level;
4848             } else {
4849                 level = (((level << 1) + 1) * qscale *
4850                          ((int) (quant_matrix[j]))) >> 4;
4851                 level = (level - 1) | 1;
4852             }
4853             block[j] = level;
4854         }
4855     }
4856 }
4857
4858 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, 
4859                                    DCTELEM *block, int n, int qscale)
4860 {
4861     int i, level, nCoeffs;
4862     const uint16_t *quant_matrix;
4863
4864     if(s->alternate_scan) nCoeffs= 63;
4865     else nCoeffs= s->block_last_index[n];
4866     
4867     if (n < 4) 
4868         block[0] = block[0] * s->y_dc_scale;
4869     else
4870         block[0] = block[0] * s->c_dc_scale;
4871     quant_matrix = s->intra_matrix;
4872     for(i=1;i<=nCoeffs;i++) {
4873         int j= s->intra_scantable.permutated[i];
4874         level = block[j];
4875         if (level) {
4876             if (level < 0) {
4877                 level = -level;
4878                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
4879                 level = -level;
4880             } else {
4881                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
4882             }
4883             block[j] = level;
4884         }
4885     }
4886 }
4887
4888 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, 
4889                                    DCTELEM *block, int n, int qscale)
4890 {
4891     int i, level, nCoeffs;
4892     const uint16_t *quant_matrix;
4893     int sum=-1;
4894
4895     if(s->alternate_scan) nCoeffs= 63;
4896     else nCoeffs= s->block_last_index[n];
4897     
4898     quant_matrix = s->inter_matrix;
4899     for(i=0; i<=nCoeffs; i++) {
4900         int j= s->intra_scantable.permutated[i];
4901         level = block[j];
4902         if (level) {
4903             if (level < 0) {
4904                 level = -level;
4905                 level = (((level << 1) + 1) * qscale *
4906                          ((int) (quant_matrix[j]))) >> 4;
4907                 level = -level;
4908             } else {
4909                 level = (((level << 1) + 1) * qscale *
4910                          ((int) (quant_matrix[j]))) >> 4;
4911             }
4912             block[j] = level;
4913             sum+=level;
4914         }
4915     }
4916     block[63]^=sum&1;
4917 }
4918
4919 static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
4920                                   DCTELEM *block, int n, int qscale)
4921 {
4922     int i, level, qmul, qadd;
4923     int nCoeffs;
4924     
4925     assert(s->block_last_index[n]>=0);
4926     
4927     qmul = qscale << 1;
4928     
4929     if (!s->h263_aic) {
4930         if (n < 4) 
4931             block[0] = block[0] * s->y_dc_scale;
4932         else
4933             block[0] = block[0] * s->c_dc_scale;
4934         qadd = (qscale - 1) | 1;
4935     }else{
4936         qadd = 0;
4937     }
4938     if(s->ac_pred)
4939         nCoeffs=63;
4940     else
4941         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4942
4943     for(i=1; i<=nCoeffs; i++) {
4944         level = block[i];
4945         if (level) {
4946             if (level < 0) {
4947                 level = level * qmul - qadd;
4948             } else {
4949                 level = level * qmul + qadd;
4950             }
4951             block[i] = level;
4952         }
4953     }
4954 }
4955
4956 static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
4957                                   DCTELEM *block, int n, int qscale)
4958 {
4959     int i, level, qmul, qadd;
4960     int nCoeffs;
4961     
4962     assert(s->block_last_index[n]>=0);
4963     
4964     qadd = (qscale - 1) | 1;
4965     qmul = qscale << 1;
4966     
4967     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4968
4969     for(i=0; i<=nCoeffs; i++) {
4970         level = block[i];
4971         if (level) {
4972             if (level < 0) {
4973                 level = level * qmul - qadd;
4974             } else {
4975                 level = level * qmul + qadd;
4976             }
4977             block[i] = level;
4978         }
4979     }
4980 }
4981
4982 static const AVOption mpeg4_options[] =
4983 {
4984     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
4985     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
4986                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
4987                        bit_rate_tolerance, 4, 240000000, 8000),
4988     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
4989     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
4990     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
4991                           rc_eq, "tex^qComp,option1,options2", 0),
4992     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
4993                        rc_min_rate, 4, 24000000, 0),
4994     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
4995                        rc_max_rate, 4, 24000000, 0),
4996     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
4997                           rc_buffer_aggressivity, 4, 24000000, 0),
4998     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
4999                           rc_initial_cplx, 0., 9999999., 0),
5000     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
5001                           i_quant_factor, 0., 0., 0),
5002     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
5003                           i_quant_factor, -999999., 999999., 0),
5004     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
5005                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
5006     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
5007                           lumi_masking, 0., 999999., 0),
5008     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
5009                           temporal_cplx_masking, 0., 999999., 0),
5010     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
5011                           spatial_cplx_masking, 0., 999999., 0),
5012     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
5013                           p_masking, 0., 999999., 0),
5014     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
5015                           dark_masking, 0., 999999., 0),
5016     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
5017                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
5018
5019     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
5020                        mb_qmin, 0, 8, 0),
5021     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
5022                        mb_qmin, 0, 8, 0),
5023
5024     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
5025                        me_cmp, 0, 24000000, 0),
5026     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
5027                        me_sub_cmp, 0, 24000000, 0),
5028
5029
5030     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
5031                        dia_size, 0, 24000000, 0),
5032     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
5033                        last_predictor_count, 0, 24000000, 0),
5034
5035     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
5036                        pre_me, 0, 24000000, 0),
5037     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
5038                        me_pre_cmp, 0, 24000000, 0),
5039
5040     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5041                        me_range, 0, 24000000, 0),
5042     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
5043                        pre_dia_size, 0, 24000000, 0),
5044     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
5045                        me_subpel_quality, 0, 24000000, 0),
5046     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
5047                        me_range, 0, 24000000, 0),
5048     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
5049                         flags, CODEC_FLAG_PSNR, 0),
5050     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
5051                               rc_override),
5052     AVOPTION_SUB(avoptions_common),
5053     AVOPTION_END()
5054 };
5055
5056 #ifdef CONFIG_ENCODERS
5057
5058 AVCodec mpeg1video_encoder = {
5059     "mpeg1video",
5060     CODEC_TYPE_VIDEO,
5061     CODEC_ID_MPEG1VIDEO,
5062     sizeof(MpegEncContext),
5063     MPV_encode_init,
5064     MPV_encode_picture,
5065     MPV_encode_end,
5066 };
5067
5068 #ifdef CONFIG_RISKY
5069
5070 AVCodec mpeg2video_encoder = {
5071     "mpeg2video",
5072     CODEC_TYPE_VIDEO,
5073     CODEC_ID_MPEG2VIDEO,
5074     sizeof(MpegEncContext),
5075     MPV_encode_init,
5076     MPV_encode_picture,
5077     MPV_encode_end,
5078 };
5079
5080 AVCodec h263_encoder = {
5081     "h263",
5082     CODEC_TYPE_VIDEO,
5083     CODEC_ID_H263,
5084     sizeof(MpegEncContext),
5085     MPV_encode_init,
5086     MPV_encode_picture,
5087     MPV_encode_end,
5088 };
5089
5090 AVCodec h263p_encoder = {
5091     "h263p",
5092     CODEC_TYPE_VIDEO,
5093     CODEC_ID_H263P,
5094     sizeof(MpegEncContext),
5095     MPV_encode_init,
5096     MPV_encode_picture,
5097     MPV_encode_end,
5098 };
5099
5100 AVCodec flv_encoder = {
5101     "flv",
5102     CODEC_TYPE_VIDEO,
5103     CODEC_ID_FLV1,
5104     sizeof(MpegEncContext),
5105     MPV_encode_init,
5106     MPV_encode_picture,
5107     MPV_encode_end,
5108 };
5109
5110 AVCodec rv10_encoder = {
5111     "rv10",
5112     CODEC_TYPE_VIDEO,
5113     CODEC_ID_RV10,
5114     sizeof(MpegEncContext),
5115     MPV_encode_init,
5116     MPV_encode_picture,
5117     MPV_encode_end,
5118 };
5119
5120 AVCodec mpeg4_encoder = {
5121     "mpeg4",
5122     CODEC_TYPE_VIDEO,
5123     CODEC_ID_MPEG4,
5124     sizeof(MpegEncContext),
5125     MPV_encode_init,
5126     MPV_encode_picture,
5127     MPV_encode_end,
5128     .options = mpeg4_options,
5129 };
5130
5131 AVCodec msmpeg4v1_encoder = {
5132     "msmpeg4v1",
5133     CODEC_TYPE_VIDEO,
5134     CODEC_ID_MSMPEG4V1,
5135     sizeof(MpegEncContext),
5136     MPV_encode_init,
5137     MPV_encode_picture,
5138     MPV_encode_end,
5139     .options = mpeg4_options,
5140 };
5141
5142 AVCodec msmpeg4v2_encoder = {
5143     "msmpeg4v2",
5144     CODEC_TYPE_VIDEO,
5145     CODEC_ID_MSMPEG4V2,
5146     sizeof(MpegEncContext),
5147     MPV_encode_init,
5148     MPV_encode_picture,
5149     MPV_encode_end,
5150     .options = mpeg4_options,
5151 };
5152
5153 AVCodec msmpeg4v3_encoder = {
5154     "msmpeg4",
5155     CODEC_TYPE_VIDEO,
5156     CODEC_ID_MSMPEG4V3,
5157     sizeof(MpegEncContext),
5158     MPV_encode_init,
5159     MPV_encode_picture,
5160     MPV_encode_end,
5161     .options = mpeg4_options,
5162 };
5163
5164 AVCodec wmv1_encoder = {
5165     "wmv1",
5166     CODEC_TYPE_VIDEO,
5167     CODEC_ID_WMV1,
5168     sizeof(MpegEncContext),
5169     MPV_encode_init,
5170     MPV_encode_picture,
5171     MPV_encode_end,
5172     .options = mpeg4_options,
5173 };
5174
5175 #endif
5176
5177 AVCodec mjpeg_encoder = {
5178     "mjpeg",
5179     CODEC_TYPE_VIDEO,
5180     CODEC_ID_MJPEG,
5181     sizeof(MpegEncContext),
5182     MPV_encode_init,
5183     MPV_encode_picture,
5184     MPV_encode_end,
5185 };
5186
5187 #endif //CONFIG_ENCODERS