]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
move mmx quantizer matrixes out of MpegEncContext (23k -> 7k) (no meassureable slowdown)
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
20  */
21  
22 /**
23  * @file mpegvideo.c
24  * The simplest mpeg encoder (well, it was the simplest!).
25  */ 
26  
27 #include <limits.h>
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31
32 #ifdef USE_FASTMEMCPY
33 #include "fastmemcpy.h"
34 #endif
35
36 //#undef NDEBUG
37 //#include <assert.h>
38
39 #ifdef CONFIG_ENCODERS
40 static void encode_picture(MpegEncContext *s, int picture_number);
41 #endif //CONFIG_ENCODERS
42 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
43                                    DCTELEM *block, int n, int qscale);
44 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_h263_c(MpegEncContext *s, 
47                                   DCTELEM *block, int n, int qscale);
48 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
49 #ifdef CONFIG_ENCODERS
50 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
51 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
52 static int sse_mb(MpegEncContext *s);
53 #endif //CONFIG_ENCODERS
54
55 #ifdef HAVE_XVMC
56 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
57 extern void XVMC_field_end(MpegEncContext *s);
58 extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
59 #endif
60
61 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
62
63
64 /* enable all paranoid tests for rounding, overflows, etc... */
65 //#define PARANOID
66
67 //#define DEBUG
68
69
70 /* for jpeg fast DCT */
71 #define CONST_BITS 14
72
73 static const uint16_t aanscales[64] = {
74     /* precomputed values scaled up by 14 bits */
75     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
76     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
77     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
78     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
79     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
80     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
81     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
82     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
83 };
84
85 static const uint8_t h263_chroma_roundtab[16] = {
86 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
87     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
88 };
89
90 #ifdef CONFIG_ENCODERS
91 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
92 static uint8_t default_fcode_tab[MAX_MV*2+1];
93
94 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
95
96 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
97                            const uint16_t *quant_matrix, int bias, int qmin, int qmax)
98 {
99     int qscale;
100
101     for(qscale=qmin; qscale<=qmax; qscale++){
102         int i;
103         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
104             for(i=0;i<64;i++) {
105                 const int j= s->dsp.idct_permutation[i];
106                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
107                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
108                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
109                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
110                 
111                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
112                                 (qscale * quant_matrix[j]));
113             }
114         } else if (s->dsp.fdct == fdct_ifast) {
115             for(i=0;i<64;i++) {
116                 const int j= s->dsp.idct_permutation[i];
117                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
118                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
119                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
120                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
121                 
122                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
123                                 (aanscales[i] * qscale * quant_matrix[j]));
124             }
125         } else {
126             for(i=0;i<64;i++) {
127                 const int j= s->dsp.idct_permutation[i];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                    So 16           <= qscale * quant_matrix[i]             <= 7905
130                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
131                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
132                 */
133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
134 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
136
137                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
138                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
139             }
140         }
141     }
142 }
143
144 static inline void update_qscale(MpegEncContext *s){
145     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
146     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
147     
148     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
149 }
150 #endif //CONFIG_ENCODERS
151
152 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
153     int i;
154     int end;
155     
156     st->scantable= src_scantable;
157
158     for(i=0; i<64; i++){
159         int j;
160         j = src_scantable[i];
161         st->permutated[i] = permutation[j];
162 #ifdef ARCH_POWERPC
163         st->inverse[j] = i;
164 #endif
165     }
166     
167     end=-1;
168     for(i=0; i<64; i++){
169         int j;
170         j = st->permutated[i];
171         if(j>end) end=j;
172         st->raster_end[i]= end;
173     }
174 }
175
176 #ifdef CONFIG_ENCODERS
177 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
178     int i;
179
180     if(matrix){
181         put_bits(pb, 1, 1);
182         for(i=0;i<64;i++) {
183             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
184         }
185     }else
186         put_bits(pb, 1, 0);
187 }
188 #endif //CONFIG_ENCODERS
189
190 /* init common dct for both encoder and decoder */
191 int DCT_common_init(MpegEncContext *s)
192 {
193     s->dct_unquantize_h263 = dct_unquantize_h263_c;
194     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
195     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
196
197 #ifdef CONFIG_ENCODERS
198     s->dct_quantize= dct_quantize_c;
199 #endif
200         
201 #ifdef HAVE_MMX
202     MPV_common_init_mmx(s);
203 #endif
204 #ifdef ARCH_ALPHA
205     MPV_common_init_axp(s);
206 #endif
207 #ifdef HAVE_MLIB
208     MPV_common_init_mlib(s);
209 #endif
210 #ifdef HAVE_MMI
211     MPV_common_init_mmi(s);
212 #endif
213 #ifdef ARCH_ARMV4L
214     MPV_common_init_armv4l(s);
215 #endif
216 #ifdef ARCH_POWERPC
217     MPV_common_init_ppc(s);
218 #endif
219
220 #ifdef CONFIG_ENCODERS
221     s->fast_dct_quantize= s->dct_quantize;
222
223     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
224         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
225     }
226
227 #endif //CONFIG_ENCODERS
228
229     /* load & permutate scantables
230        note: only wmv uses differnt ones 
231     */
232     ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
233     ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
234     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
235     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
236
237     s->picture_structure= PICT_FRAME;
238     
239     return 0;
240 }
241
242 static void copy_picture(Picture *dst, Picture *src){
243     *dst = *src;
244     dst->type= FF_BUFFER_TYPE_COPY;
245 }
246
247 /**
248  * allocates a Picture
249  * The pixels are allocated/set by calling get_buffer() if shared=0
250  */
251 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
252     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
253     const int mb_array_size= s->mb_stride*s->mb_height;
254     int i;
255     
256     if(shared){
257         assert(pic->data[0]);
258         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
259         pic->type= FF_BUFFER_TYPE_SHARED;
260     }else{
261         int r;
262         
263         assert(!pic->data[0]);
264         
265         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
266         
267         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
268             fprintf(stderr, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
269             return -1;
270         }
271
272         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
273             fprintf(stderr, "get_buffer() failed (stride changed)\n");
274             return -1;
275         }
276
277         if(pic->linesize[1] != pic->linesize[2]){
278             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
279             return -1;
280         }
281
282         s->linesize  = pic->linesize[0];
283         s->uvlinesize= pic->linesize[1];
284     }
285     
286     if(pic->qscale_table==NULL){
287         if (s->encoding) {        
288             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
289             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
290             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
291             CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
292         }
293
294         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
295         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
296         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
297         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
298         if(s->out_format == FMT_H264){
299             for(i=0; i<2; i++){
300                 CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
301                 CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
302             }
303         }
304         pic->qstride= s->mb_stride;
305         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
306     }
307
308     //it might be nicer if the application would keep track of these but it would require a API change
309     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
310     s->prev_pict_types[0]= s->pict_type;
311     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
312         pic->age= INT_MAX; // skiped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
313     
314     return 0;
315 fail: //for the CHECKED_ALLOCZ macro
316     return -1;
317 }
318
319 /**
320  * deallocates a picture
321  */
322 static void free_picture(MpegEncContext *s, Picture *pic){
323     int i;
324
325     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
326         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
327     }
328
329     av_freep(&pic->mb_var);
330     av_freep(&pic->mc_mb_var);
331     av_freep(&pic->mb_mean);
332     av_freep(&pic->mb_cmp_score);
333     av_freep(&pic->mbskip_table);
334     av_freep(&pic->qscale_table);
335     av_freep(&pic->mb_type_base);
336     av_freep(&pic->pan_scan);
337     pic->mb_type= NULL;
338     for(i=0; i<2; i++){
339         av_freep(&pic->motion_val[i]);
340         av_freep(&pic->ref_index[i]);
341     }
342     
343     if(pic->type == FF_BUFFER_TYPE_SHARED){
344         for(i=0; i<4; i++){
345             pic->base[i]=
346             pic->data[i]= NULL;
347         }
348         pic->type= 0;        
349     }
350 }
351
352 /* init common structure for both encoder and decoder */
353 int MPV_common_init(MpegEncContext *s)
354 {
355     int y_size, c_size, yc_size, i, mb_array_size, x, y;
356
357     dsputil_init(&s->dsp, s->avctx);
358     DCT_common_init(s);
359
360     s->flags= s->avctx->flags;
361
362     s->mb_width  = (s->width  + 15) / 16;
363     s->mb_height = (s->height + 15) / 16;
364     s->mb_stride = s->mb_width + 1;
365     mb_array_size= s->mb_height * s->mb_stride;
366
367     /* set default edge pos, will be overriden in decode_header if needed */
368     s->h_edge_pos= s->mb_width*16;
369     s->v_edge_pos= s->mb_height*16;
370
371     s->mb_num = s->mb_width * s->mb_height;
372     
373     s->block_wrap[0]=
374     s->block_wrap[1]=
375     s->block_wrap[2]=
376     s->block_wrap[3]= s->mb_width*2 + 2;
377     s->block_wrap[4]=
378     s->block_wrap[5]= s->mb_width + 2;
379
380     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
381     c_size = (s->mb_width + 2) * (s->mb_height + 2);
382     yc_size = y_size + 2 * c_size;
383
384     /* convert fourcc to upper case */
385     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
386                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
387                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
388                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
389
390     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)          
391                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
392                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
393                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
394
395     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
396     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
397
398     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
399
400     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
401     for(y=0; y<s->mb_height; y++){
402         for(x=0; x<s->mb_width; x++){
403             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
404         }
405     }
406     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
407     
408     if (s->encoding) {
409         int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
410
411         /* Allocate MV tables */
412         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
413         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
414         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
415         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
416         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
417         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
418         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
419         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
420         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
421         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
422         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
423         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
424
425         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
426         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
427         
428         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
429         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
430
431         if(s->codec_id==CODEC_ID_MPEG4){
432             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
433             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
434         }
435         
436         if(s->msmpeg4_version){
437             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
438         }
439         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
440
441         /* Allocate MB type table */
442         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
443         
444         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
445         
446         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
447         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
448         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
449         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
450     }
451         
452     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
453
454     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
455     
456     if (s->out_format == FMT_H263 || s->encoding) {
457         int size;
458
459         /* MV prediction */
460         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
461         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(int16_t));
462     }
463
464     if(s->codec_id==CODEC_ID_MPEG4){
465         /* interlaced direct mode decoding tables */
466         CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
467         CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
468     }
469     if (s->out_format == FMT_H263) {
470         /* ac values */
471         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
472         s->ac_val[1] = s->ac_val[0] + y_size;
473         s->ac_val[2] = s->ac_val[1] + c_size;
474         
475         /* cbp values */
476         CHECKED_ALLOCZ(s->coded_block, y_size);
477         
478         /* divx501 bitstream reorder buffer */
479         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
480
481         /* cbp, ac_pred, pred_dir */
482         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
483         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
484     }
485     
486     if (s->h263_pred || s->h263_plus || !s->encoding) {
487         /* dc values */
488         //MN: we need these for error resilience of intra-frames
489         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(int16_t));
490         s->dc_val[1] = s->dc_val[0] + y_size;
491         s->dc_val[2] = s->dc_val[1] + c_size;
492         for(i=0;i<yc_size;i++)
493             s->dc_val[0][i] = 1024;
494     }
495
496     /* which mb is a intra block */
497     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
498     memset(s->mbintra_table, 1, mb_array_size);
499     
500     /* default structure is frame */
501     s->picture_structure = PICT_FRAME;
502     
503     /* init macroblock skip table */
504     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
505     //Note the +1 is for a quicker mpeg4 slice_end detection
506     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
507     
508     s->block= s->blocks[0];
509
510     s->parse_context.state= -1;
511
512     s->context_initialized = 1;
513     return 0;
514  fail:
515     MPV_common_end(s);
516     return -1;
517 }
518
519
520 //extern int sads;
521
522 /* init common structure for both encoder and decoder */
523 void MPV_common_end(MpegEncContext *s)
524 {
525     int i;
526
527     av_freep(&s->parse_context.buffer);
528     s->parse_context.buffer_size=0;
529
530     av_freep(&s->mb_type);
531     av_freep(&s->p_mv_table_base);
532     av_freep(&s->b_forw_mv_table_base);
533     av_freep(&s->b_back_mv_table_base);
534     av_freep(&s->b_bidir_forw_mv_table_base);
535     av_freep(&s->b_bidir_back_mv_table_base);
536     av_freep(&s->b_direct_mv_table_base);
537     s->p_mv_table= NULL;
538     s->b_forw_mv_table= NULL;
539     s->b_back_mv_table= NULL;
540     s->b_bidir_forw_mv_table= NULL;
541     s->b_bidir_back_mv_table= NULL;
542     s->b_direct_mv_table= NULL;
543     
544     av_freep(&s->motion_val);
545     av_freep(&s->dc_val[0]);
546     av_freep(&s->ac_val[0]);
547     av_freep(&s->coded_block);
548     av_freep(&s->mbintra_table);
549     av_freep(&s->cbp_table);
550     av_freep(&s->pred_dir_table);
551     av_freep(&s->me.scratchpad);
552     av_freep(&s->me.map);
553     av_freep(&s->me.score_map);
554     
555     av_freep(&s->mbskip_table);
556     av_freep(&s->prev_pict_types);
557     av_freep(&s->bitstream_buffer);
558     av_freep(&s->tex_pb_buffer);
559     av_freep(&s->pb2_buffer);
560     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
561     av_freep(&s->field_mv_table);
562     av_freep(&s->field_select_table);
563     av_freep(&s->avctx->stats_out);
564     av_freep(&s->ac_stats);
565     av_freep(&s->error_status_table);
566     av_freep(&s->mb_index2xy);
567     av_freep(&s->lambda_table);
568     av_freep(&s->q_intra_matrix);
569     av_freep(&s->q_inter_matrix);
570     av_freep(&s->q_intra_matrix16);
571     av_freep(&s->q_inter_matrix16);
572
573     for(i=0; i<MAX_PICTURE_COUNT; i++){
574         free_picture(s, &s->picture[i]);
575     }
576     av_freep(&s->picture);
577     avcodec_default_free_buffers(s->avctx);
578     s->context_initialized = 0;
579     s->last_picture_ptr=
580     s->next_picture_ptr=
581     s->current_picture_ptr= NULL;
582 }
583
584 #ifdef CONFIG_ENCODERS
585
586 /* init video encoder */
587 int MPV_encode_init(AVCodecContext *avctx)
588 {
589     MpegEncContext *s = avctx->priv_data;
590     int i, dummy;
591     int chroma_h_shift, chroma_v_shift;
592
593     avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
594
595     s->bit_rate = avctx->bit_rate;
596     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
597     s->width = avctx->width;
598     s->height = avctx->height;
599     if(avctx->gop_size > 600){
600         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
601         avctx->gop_size=600;
602     }
603     s->gop_size = avctx->gop_size;
604     s->rtp_mode = avctx->rtp_mode;
605     s->rtp_payload_size = avctx->rtp_payload_size;
606     if (avctx->rtp_callback)
607         s->rtp_callback = avctx->rtp_callback;
608     s->max_qdiff= avctx->max_qdiff;
609     s->qcompress= avctx->qcompress;
610     s->qblur= avctx->qblur;
611     s->avctx = avctx;
612     s->flags= avctx->flags;
613     s->max_b_frames= avctx->max_b_frames;
614     s->b_frame_strategy= avctx->b_frame_strategy;
615     s->codec_id= avctx->codec->id;
616     s->luma_elim_threshold  = avctx->luma_elim_threshold;
617     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
618     s->strict_std_compliance= avctx->strict_std_compliance;
619     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
620     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
621     s->mpeg_quant= avctx->mpeg_quant;
622
623     if (s->gop_size <= 1) {
624         s->intra_only = 1;
625         s->gop_size = 12;
626     } else {
627         s->intra_only = 0;
628     }
629
630     s->me_method = avctx->me_method;
631
632     /* Fixed QSCALE */
633     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
634     
635     s->adaptive_quant= (   s->avctx->lumi_masking
636                         || s->avctx->dark_masking
637                         || s->avctx->temporal_cplx_masking 
638                         || s->avctx->spatial_cplx_masking
639                         || s->avctx->p_masking)
640                        && !s->fixed_qscale;
641     
642     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
643
644     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
645         fprintf(stderr, "4MV not supporetd by codec\n");
646         return -1;
647     }
648     
649     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
650         fprintf(stderr, "qpel not supporetd by codec\n");
651         return -1;
652     }
653
654     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
655         fprintf(stderr, "data partitioning not supporetd by codec\n");
656         return -1;
657     }
658     
659     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
660         fprintf(stderr, "b frames not supporetd by codec\n");
661         return -1;
662     }
663     
664     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
665         fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
666         return -1;
667     }
668         
669     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
670         fprintf(stderr, "CBP RD needs trellis quant\n");
671         return -1;
672     }
673
674     if(s->codec_id==CODEC_ID_MJPEG){
675         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
676         s->inter_quant_bias= 0;
677     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
678         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
679         s->inter_quant_bias= 0;
680     }else{
681         s->intra_quant_bias=0;
682         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
683     }
684     
685     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
686         s->intra_quant_bias= avctx->intra_quant_bias;
687     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
688         s->inter_quant_bias= avctx->inter_quant_bias;
689         
690     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
691
692     av_reduce(&s->time_increment_resolution, &dummy, s->avctx->frame_rate, s->avctx->frame_rate_base, (1<<16)-1);
693     s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
694
695     switch(avctx->codec->id) {
696     case CODEC_ID_MPEG1VIDEO:
697         s->out_format = FMT_MPEG1;
698         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
699         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
700         break;
701     case CODEC_ID_MPEG2VIDEO:
702         s->out_format = FMT_MPEG1;
703         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
704         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
705         s->rtp_mode= 1; // mpeg2 must have slices
706         if(s->rtp_payload_size == 0) s->rtp_payload_size= 256*256*256;
707         break;
708     case CODEC_ID_LJPEG:
709     case CODEC_ID_MJPEG:
710         s->out_format = FMT_MJPEG;
711         s->intra_only = 1; /* force intra only for jpeg */
712         s->mjpeg_write_tables = 1; /* write all tables */
713         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
714         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
715         s->mjpeg_vsample[1] = 1;
716         s->mjpeg_vsample[2] = 1; 
717         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
718         s->mjpeg_hsample[1] = 1; 
719         s->mjpeg_hsample[2] = 1; 
720         if (mjpeg_init(s) < 0)
721             return -1;
722         avctx->delay=0;
723         s->low_delay=1;
724         break;
725 #ifdef CONFIG_RISKY
726     case CODEC_ID_H263:
727         if (h263_get_picture_format(s->width, s->height) == 7) {
728             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
729             return -1;
730         }
731         s->out_format = FMT_H263;
732         avctx->delay=0;
733         s->low_delay=1;
734         break;
735     case CODEC_ID_H263P:
736         s->out_format = FMT_H263;
737         s->h263_plus = 1;
738         /* Fx */
739         s->unrestricted_mv=(avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
740         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
741         /* /Fx */
742         /* These are just to be sure */
743         s->umvplus = 1;
744         avctx->delay=0;
745         s->low_delay=1;
746         break;
747     case CODEC_ID_FLV1:
748         s->out_format = FMT_H263;
749         s->h263_flv = 2; /* format = 1; 11-bit codes */
750         s->unrestricted_mv = 1;
751         s->rtp_mode=0; /* don't allow GOB */
752         avctx->delay=0;
753         s->low_delay=1;
754         break;
755     case CODEC_ID_RV10:
756         s->out_format = FMT_H263;
757         s->h263_rv10 = 1;
758         avctx->delay=0;
759         s->low_delay=1;
760         break;
761     case CODEC_ID_MPEG4:
762         s->out_format = FMT_H263;
763         s->h263_pred = 1;
764         s->unrestricted_mv = 1;
765         s->low_delay= s->max_b_frames ? 0 : 1;
766         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
767         break;
768     case CODEC_ID_MSMPEG4V1:
769         s->out_format = FMT_H263;
770         s->h263_msmpeg4 = 1;
771         s->h263_pred = 1;
772         s->unrestricted_mv = 1;
773         s->msmpeg4_version= 1;
774         avctx->delay=0;
775         s->low_delay=1;
776         break;
777     case CODEC_ID_MSMPEG4V2:
778         s->out_format = FMT_H263;
779         s->h263_msmpeg4 = 1;
780         s->h263_pred = 1;
781         s->unrestricted_mv = 1;
782         s->msmpeg4_version= 2;
783         avctx->delay=0;
784         s->low_delay=1;
785         break;
786     case CODEC_ID_MSMPEG4V3:
787         s->out_format = FMT_H263;
788         s->h263_msmpeg4 = 1;
789         s->h263_pred = 1;
790         s->unrestricted_mv = 1;
791         s->msmpeg4_version= 3;
792         s->flipflop_rounding=1;
793         avctx->delay=0;
794         s->low_delay=1;
795         break;
796     case CODEC_ID_WMV1:
797         s->out_format = FMT_H263;
798         s->h263_msmpeg4 = 1;
799         s->h263_pred = 1;
800         s->unrestricted_mv = 1;
801         s->msmpeg4_version= 4;
802         s->flipflop_rounding=1;
803         avctx->delay=0;
804         s->low_delay=1;
805         break;
806     case CODEC_ID_WMV2:
807         s->out_format = FMT_H263;
808         s->h263_msmpeg4 = 1;
809         s->h263_pred = 1;
810         s->unrestricted_mv = 1;
811         s->msmpeg4_version= 5;
812         s->flipflop_rounding=1;
813         avctx->delay=0;
814         s->low_delay=1;
815         break;
816 #endif
817     default:
818         return -1;
819     }
820     
821     { /* set up some save defaults, some codecs might override them later */
822         static int done=0;
823         if(!done){
824             int i;
825             done=1;
826
827             default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
828             memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
829             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
830
831             for(i=-16; i<16; i++){
832                 default_fcode_tab[i + MAX_MV]= 1;
833             }
834         }
835     }
836     s->me.mv_penalty= default_mv_penalty;
837     s->fcode_tab= default_fcode_tab;
838     s->y_dc_scale_table=
839     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
840  
841     /* dont use mv_penalty table for crap MV as it would be confused */
842     //FIXME remove after fixing / removing old ME
843     if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
844
845     s->encoding = 1;
846
847     /* init */
848     if (MPV_common_init(s) < 0)
849         return -1;
850     
851     ff_init_me(s);
852
853 #ifdef CONFIG_ENCODERS
854 #ifdef CONFIG_RISKY
855     if (s->out_format == FMT_H263)
856         h263_encode_init(s);
857     if(s->msmpeg4_version)
858         ff_msmpeg4_encode_init(s);
859 #endif
860     if (s->out_format == FMT_MPEG1)
861         ff_mpeg1_encode_init(s);
862 #endif
863
864     /* init default q matrix */
865     for(i=0;i<64;i++) {
866         int j= s->dsp.idct_permutation[i];
867 #ifdef CONFIG_RISKY
868         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
869             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
870             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
871         }else if(s->out_format == FMT_H263){
872             s->intra_matrix[j] =
873             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
874         }else
875 #endif
876         { /* mpeg1/2 */
877             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
878             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
879         }
880         if(s->avctx->intra_matrix)
881             s->intra_matrix[j] = s->avctx->intra_matrix[i];
882         if(s->avctx->inter_matrix)
883             s->inter_matrix[j] = s->avctx->inter_matrix[i];
884     }
885
886     /* precompute matrix */
887     /* for mjpeg, we do include qscale in the matrix */
888     if (s->out_format != FMT_MJPEG) {
889         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
890                        s->intra_matrix, s->intra_quant_bias, 1, 31);
891         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, 
892                        s->inter_matrix, s->inter_quant_bias, 1, 31);
893     }
894
895     if(ff_rate_control_init(s) < 0)
896         return -1;
897
898     s->picture_number = 0;
899     s->picture_in_gop_number = 0;
900     s->fake_picture_number = 0;
901     /* motion detector init */
902     s->f_code = 1;
903     s->b_code = 1;
904
905     return 0;
906 }
907
908 int MPV_encode_end(AVCodecContext *avctx)
909 {
910     MpegEncContext *s = avctx->priv_data;
911
912 #ifdef STATS
913     print_stats();
914 #endif
915
916     ff_rate_control_uninit(s);
917
918     MPV_common_end(s);
919     if (s->out_format == FMT_MJPEG)
920         mjpeg_close(s);
921
922     av_freep(&avctx->extradata);
923       
924     return 0;
925 }
926
927 #endif //CONFIG_ENCODERS
928
929 void init_rl(RLTable *rl)
930 {
931     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
932     uint8_t index_run[MAX_RUN+1];
933     int last, run, level, start, end, i;
934
935     /* compute max_level[], max_run[] and index_run[] */
936     for(last=0;last<2;last++) {
937         if (last == 0) {
938             start = 0;
939             end = rl->last;
940         } else {
941             start = rl->last;
942             end = rl->n;
943         }
944
945         memset(max_level, 0, MAX_RUN + 1);
946         memset(max_run, 0, MAX_LEVEL + 1);
947         memset(index_run, rl->n, MAX_RUN + 1);
948         for(i=start;i<end;i++) {
949             run = rl->table_run[i];
950             level = rl->table_level[i];
951             if (index_run[run] == rl->n)
952                 index_run[run] = i;
953             if (level > max_level[run])
954                 max_level[run] = level;
955             if (run > max_run[level])
956                 max_run[level] = run;
957         }
958         rl->max_level[last] = av_malloc(MAX_RUN + 1);
959         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
960         rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
961         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
962         rl->index_run[last] = av_malloc(MAX_RUN + 1);
963         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
964     }
965 }
966
967 /* draw the edges of width 'w' of an image of size width, height */
968 //FIXME check that this is ok for mpeg4 interlaced
969 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
970 {
971     uint8_t *ptr, *last_line;
972     int i;
973
974     last_line = buf + (height - 1) * wrap;
975     for(i=0;i<w;i++) {
976         /* top and bottom */
977         memcpy(buf - (i + 1) * wrap, buf, width);
978         memcpy(last_line + (i + 1) * wrap, last_line, width);
979     }
980     /* left and right */
981     ptr = buf;
982     for(i=0;i<height;i++) {
983         memset(ptr - w, ptr[0], w);
984         memset(ptr + width, ptr[width-1], w);
985         ptr += wrap;
986     }
987     /* corners */
988     for(i=0;i<w;i++) {
989         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
990         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
991         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
992         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
993     }
994 }
995
996 static int find_unused_picture(MpegEncContext *s, int shared){
997     int i;
998     
999     if(shared){
1000         for(i=0; i<MAX_PICTURE_COUNT; i++){
1001             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
1002         }
1003     }else{
1004         for(i=0; i<MAX_PICTURE_COUNT; i++){
1005             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
1006         }
1007         for(i=0; i<MAX_PICTURE_COUNT; i++){
1008             if(s->picture[i].data[0]==NULL) break;
1009         }
1010     }
1011
1012     assert(i<MAX_PICTURE_COUNT);
1013     return i;
1014 }
1015
1016 /* generic function for encode/decode called before a frame is coded/decoded */
1017 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1018 {
1019     int i;
1020     AVFrame *pic;
1021
1022     s->mb_skiped = 0;
1023
1024     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1025
1026     /* mark&release old frames */
1027     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr->data[0]) {
1028         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1029
1030         /* release forgotten pictures */
1031         /* if(mpeg124/h263) */
1032         if(!s->encoding){
1033             for(i=0; i<MAX_PICTURE_COUNT; i++){
1034                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1035                     fprintf(stderr, "releasing zombie picture\n");
1036                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
1037                 }
1038             }
1039         }
1040     }
1041 alloc:
1042     if(!s->encoding){
1043         /* release non refernce frames */
1044         for(i=0; i<MAX_PICTURE_COUNT; i++){
1045             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1046                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1047             }
1048         }
1049
1050         i= find_unused_picture(s, 0);
1051     
1052         pic= (AVFrame*)&s->picture[i];
1053         pic->reference= s->pict_type != B_TYPE ? 3 : 0;
1054
1055         if(s->current_picture_ptr)
1056             pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
1057         
1058         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1059             return -1;
1060
1061         s->current_picture_ptr= &s->picture[i];
1062     }
1063
1064     s->current_picture_ptr->pict_type= s->pict_type;
1065 //    if(s->flags && CODEC_FLAG_QSCALE) 
1066   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1067     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1068
1069     copy_picture(&s->current_picture, s->current_picture_ptr);
1070   
1071   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1072     if (s->pict_type != B_TYPE) {
1073         s->last_picture_ptr= s->next_picture_ptr;
1074         s->next_picture_ptr= s->current_picture_ptr;
1075     }
1076     
1077     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1078     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1079     
1080     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1081         fprintf(stderr, "warning: first frame is no keyframe\n");
1082         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
1083         goto alloc;
1084     }
1085
1086     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1087
1088     if(s->picture_structure!=PICT_FRAME){
1089         int i;
1090         for(i=0; i<4; i++){
1091             if(s->picture_structure == PICT_BOTTOM_FIELD){
1092                  s->current_picture.data[i] += s->current_picture.linesize[i];
1093             } 
1094             s->current_picture.linesize[i] *= 2;
1095             s->last_picture.linesize[i] *=2;
1096             s->next_picture.linesize[i] *=2;
1097         }
1098     }
1099   }
1100    
1101     s->hurry_up= s->avctx->hurry_up;
1102     s->error_resilience= avctx->error_resilience;
1103
1104     /* set dequantizer, we cant do it during init as it might change for mpeg4
1105        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
1106     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO) 
1107         s->dct_unquantize = s->dct_unquantize_mpeg2;
1108     else if(s->out_format == FMT_H263)
1109         s->dct_unquantize = s->dct_unquantize_h263;
1110     else 
1111         s->dct_unquantize = s->dct_unquantize_mpeg1;
1112
1113 #ifdef HAVE_XVMC
1114     if(s->avctx->xvmc_acceleration)
1115         return XVMC_field_start(s, avctx);
1116 #endif
1117     return 0;
1118 }
1119
1120 /* generic function for encode/decode called after a frame has been coded/decoded */
1121 void MPV_frame_end(MpegEncContext *s)
1122 {
1123     int i;
1124     /* draw edge for correct motion prediction if outside */
1125 #ifdef HAVE_XVMC
1126 //just to make sure that all data is rendered.
1127     if(s->avctx->xvmc_acceleration){
1128         XVMC_field_end(s);
1129     }else
1130 #endif
1131     if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1132             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1133             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1134             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1135     }
1136     emms_c();
1137     
1138     s->last_pict_type    = s->pict_type;
1139     if(s->pict_type!=B_TYPE){
1140         s->last_non_b_pict_type= s->pict_type;
1141     }
1142 #if 0
1143         /* copy back current_picture variables */
1144     for(i=0; i<MAX_PICTURE_COUNT; i++){
1145         if(s->picture[i].data[0] == s->current_picture.data[0]){
1146             s->picture[i]= s->current_picture;
1147             break;
1148         }    
1149     }
1150     assert(i<MAX_PICTURE_COUNT);
1151 #endif    
1152
1153     if(s->encoding){
1154         /* release non refernce frames */
1155         for(i=0; i<MAX_PICTURE_COUNT; i++){
1156             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1157                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1158             }
1159         }
1160     }
1161     // clear copies, to avoid confusion
1162 #if 0
1163     memset(&s->last_picture, 0, sizeof(Picture));
1164     memset(&s->next_picture, 0, sizeof(Picture));
1165     memset(&s->current_picture, 0, sizeof(Picture));
1166 #endif
1167 }
1168
1169 /**
1170  * draws an line from (ex, ey) -> (sx, sy).
1171  * @param w width of the image
1172  * @param h height of the image
1173  * @param stride stride/linesize of the image
1174  * @param color color of the arrow
1175  */
1176 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1177     int t, x, y, f;
1178     
1179     sx= clip(sx, 0, w-1);
1180     sy= clip(sy, 0, h-1);
1181     ex= clip(ex, 0, w-1);
1182     ey= clip(ey, 0, h-1);
1183     
1184     buf[sy*stride + sx]+= color;
1185     
1186     if(ABS(ex - sx) > ABS(ey - sy)){
1187         if(sx > ex){
1188             t=sx; sx=ex; ex=t;
1189             t=sy; sy=ey; ey=t;
1190         }
1191         buf+= sx + sy*stride;
1192         ex-= sx;
1193         f= ((ey-sy)<<16)/ex;
1194         for(x= 0; x <= ex; x++){
1195             y= ((x*f) + (1<<15))>>16;
1196             buf[y*stride + x]+= color;
1197         }
1198     }else{
1199         if(sy > ey){
1200             t=sx; sx=ex; ex=t;
1201             t=sy; sy=ey; ey=t;
1202         }
1203         buf+= sx + sy*stride;
1204         ey-= sy;
1205         if(ey) f= ((ex-sx)<<16)/ey;
1206         else   f= 0;
1207         for(y= 0; y <= ey; y++){
1208             x= ((y*f) + (1<<15))>>16;
1209             buf[y*stride + x]+= color;
1210         }
1211     }
1212 }
1213
1214 /**
1215  * draws an arrow from (ex, ey) -> (sx, sy).
1216  * @param w width of the image
1217  * @param h height of the image
1218  * @param stride stride/linesize of the image
1219  * @param color color of the arrow
1220  */
1221 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
1222     int dx,dy;
1223
1224     sx= clip(sx, -100, w+100);
1225     sy= clip(sy, -100, h+100);
1226     ex= clip(ex, -100, w+100);
1227     ey= clip(ey, -100, h+100);
1228     
1229     dx= ex - sx;
1230     dy= ey - sy;
1231     
1232     if(dx*dx + dy*dy > 3*3){
1233         int rx=  dx + dy;
1234         int ry= -dx + dy;
1235         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1236         
1237         //FIXME subpixel accuracy
1238         rx= ROUNDED_DIV(rx*3<<4, length);
1239         ry= ROUNDED_DIV(ry*3<<4, length);
1240         
1241         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1242         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1243     }
1244     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1245 }
1246
1247 /**
1248  * prints debuging info for the given picture.
1249  */
1250 void ff_print_debug_info(MpegEncContext *s, Picture *pict){
1251
1252     if(!pict || !pict->mb_type) return;
1253
1254     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1255         int x,y;
1256
1257         for(y=0; y<s->mb_height; y++){
1258             for(x=0; x<s->mb_width; x++){
1259                 if(s->avctx->debug&FF_DEBUG_SKIP){
1260                     int count= s->mbskip_table[x + y*s->mb_stride];
1261                     if(count>9) count=9;
1262                     printf("%1d", count);
1263                 }
1264                 if(s->avctx->debug&FF_DEBUG_QP){
1265                     printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
1266                 }
1267                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1268                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1269                     
1270                     //Type & MV direction
1271                     if(IS_PCM(mb_type))
1272                         printf("P");
1273                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1274                         printf("A");
1275                     else if(IS_INTRA4x4(mb_type))
1276                         printf("i");
1277                     else if(IS_INTRA16x16(mb_type))
1278                         printf("I");
1279                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1280                         printf("d");
1281                     else if(IS_DIRECT(mb_type))
1282                         printf("D");
1283                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1284                         printf("g");
1285                     else if(IS_GMC(mb_type))
1286                         printf("G");
1287                     else if(IS_SKIP(mb_type))
1288                         printf("S");
1289                     else if(!USES_LIST(mb_type, 1))
1290                         printf(">");
1291                     else if(!USES_LIST(mb_type, 0))
1292                         printf("<");
1293                     else{
1294                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1295                         printf("X");
1296                     }
1297                     
1298                     //segmentation
1299                     if(IS_8X8(mb_type))
1300                         printf("+");
1301                     else if(IS_16X8(mb_type))
1302                         printf("-");
1303                     else if(IS_8X16(mb_type))
1304                         printf("¦");
1305                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1306                         printf(" ");
1307                     else
1308                         printf("?");
1309                     
1310                         
1311                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1312                         printf("=");
1313                     else
1314                         printf(" ");
1315                 }
1316 //                printf(" ");
1317             }
1318             printf("\n");
1319         }
1320     }
1321     
1322     if((s->avctx->debug&FF_DEBUG_VIS_MV) && s->motion_val){
1323         const int shift= 1 + s->quarter_sample;
1324         int mb_y;
1325         uint8_t *ptr= pict->data[0];
1326         s->low_delay=0; //needed to see the vectors without trashing the buffers
1327
1328         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1329             int mb_x;
1330             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1331                 const int mb_index= mb_x + mb_y*s->mb_stride;
1332                 if(IS_8X8(s->current_picture.mb_type[mb_index])){
1333                     int i;
1334                     for(i=0; i<4; i++){
1335                         int sx= mb_x*16 + 4 + 8*(i&1);
1336                         int sy= mb_y*16 + 4 + 8*(i>>1);
1337                         int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
1338                         int mx= (s->motion_val[xy][0]>>shift) + sx;
1339                         int my= (s->motion_val[xy][1]>>shift) + sy;
1340                         draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1341                     }
1342                 }else{
1343                     int sx= mb_x*16 + 8;
1344                     int sy= mb_y*16 + 8;
1345                     int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
1346                     int mx= (s->motion_val[xy][0]>>shift) + sx;
1347                     int my= (s->motion_val[xy][1]>>shift) + sy;
1348                     draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
1349                 }
1350                 s->mbskip_table[mb_index]=0;
1351             }
1352         }
1353     }
1354 }
1355
1356 #ifdef CONFIG_ENCODERS
1357
1358 static int get_sae(uint8_t *src, int ref, int stride){
1359     int x,y;
1360     int acc=0;
1361     
1362     for(y=0; y<16; y++){
1363         for(x=0; x<16; x++){
1364             acc+= ABS(src[x+y*stride] - ref);
1365         }
1366     }
1367     
1368     return acc;
1369 }
1370
1371 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
1372     int x, y, w, h;
1373     int acc=0;
1374     
1375     w= s->width &~15;
1376     h= s->height&~15;
1377     
1378     for(y=0; y<h; y+=16){
1379         for(x=0; x<w; x+=16){
1380             int offset= x + y*stride;
1381             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
1382             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
1383             int sae = get_sae(src + offset, mean, stride);
1384             
1385             acc+= sae + 500 < sad;
1386         }
1387     }
1388     return acc;
1389 }
1390
1391
1392 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
1393     AVFrame *pic=NULL;
1394     int i;
1395     const int encoding_delay= s->max_b_frames;
1396     int direct=1;
1397     
1398   if(pic_arg){
1399     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
1400     if(pic_arg->linesize[0] != s->linesize) direct=0;
1401     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
1402     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
1403   
1404 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
1405     
1406     if(direct){
1407         i= find_unused_picture(s, 1);
1408
1409         pic= (AVFrame*)&s->picture[i];
1410         pic->reference= 3;
1411     
1412         for(i=0; i<4; i++){
1413             pic->data[i]= pic_arg->data[i];
1414             pic->linesize[i]= pic_arg->linesize[i];
1415         }
1416         alloc_picture(s, (Picture*)pic, 1);
1417     }else{
1418         int offset= 16;
1419         i= find_unused_picture(s, 0);
1420
1421         pic= (AVFrame*)&s->picture[i];
1422         pic->reference= 3;
1423
1424         alloc_picture(s, (Picture*)pic, 0);
1425
1426         if(   pic->data[0] + offset == pic_arg->data[0] 
1427            && pic->data[1] + offset == pic_arg->data[1]
1428            && pic->data[2] + offset == pic_arg->data[2]){
1429        // empty
1430         }else{
1431             int h_chroma_shift, v_chroma_shift;
1432             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1433         
1434             for(i=0; i<3; i++){
1435                 int src_stride= pic_arg->linesize[i];
1436                 int dst_stride= i ? s->uvlinesize : s->linesize;
1437                 int h_shift= i ? h_chroma_shift : 0;
1438                 int v_shift= i ? v_chroma_shift : 0;
1439                 int w= s->width >>h_shift;
1440                 int h= s->height>>v_shift;
1441                 uint8_t *src= pic_arg->data[i];
1442                 uint8_t *dst= pic->data[i] + offset;
1443             
1444                 if(src_stride==dst_stride)
1445                     memcpy(dst, src, src_stride*h);
1446                 else{
1447                     while(h--){
1448                         memcpy(dst, src, w);
1449                         dst += dst_stride;
1450                         src += src_stride;
1451                     }
1452                 }
1453             }
1454         }
1455     }
1456     pic->quality= pic_arg->quality;
1457     pic->pict_type= pic_arg->pict_type;
1458     pic->pts = pic_arg->pts;
1459     
1460     if(s->input_picture[encoding_delay])
1461         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1462     
1463   }
1464
1465     /* shift buffer entries */
1466     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1467         s->input_picture[i-1]= s->input_picture[i];
1468         
1469     s->input_picture[encoding_delay]= (Picture*)pic;
1470
1471     return 0;
1472 }
1473
1474 static void select_input_picture(MpegEncContext *s){
1475     int i;
1476     int coded_pic_num=0;    
1477
1478     if(s->reordered_input_picture[0])
1479         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1480
1481     for(i=1; i<MAX_PICTURE_COUNT; i++)
1482         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1483     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1484
1485     /* set next picture types & ordering */
1486     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1487         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
1488             s->reordered_input_picture[0]= s->input_picture[0];
1489             s->reordered_input_picture[0]->pict_type= I_TYPE;
1490             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1491         }else{
1492             int b_frames;
1493             
1494             if(s->flags&CODEC_FLAG_PASS2){
1495                 for(i=0; i<s->max_b_frames+1; i++){
1496                     int pict_num= s->input_picture[0]->display_picture_number + i;
1497                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1498                     s->input_picture[i]->pict_type= pict_type;
1499                     
1500                     if(i + 1 >= s->rc_context.num_entries) break;
1501                 }
1502             }
1503
1504             if(s->input_picture[0]->pict_type){
1505                 /* user selected pict_type */
1506                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1507                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1508                 }
1509             
1510                 if(b_frames > s->max_b_frames){
1511                     fprintf(stderr, "warning, too many bframes in a row\n");
1512                     b_frames = s->max_b_frames;
1513                 }
1514             }else if(s->b_frame_strategy==0){
1515                 b_frames= s->max_b_frames;
1516                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
1517             }else if(s->b_frame_strategy==1){
1518                 for(i=1; i<s->max_b_frames+1; i++){
1519                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
1520                         s->input_picture[i]->b_frame_score= 
1521                             get_intra_count(s, s->input_picture[i  ]->data[0], 
1522                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1523                     }
1524                 }
1525                 for(i=0; i<s->max_b_frames; i++){
1526                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1527                 }
1528                                 
1529                 b_frames= FFMAX(0, i-1);
1530                 
1531                 /* reset scores */
1532                 for(i=0; i<b_frames+1; i++){
1533                     s->input_picture[i]->b_frame_score=0;
1534                 }
1535             }else{
1536                 fprintf(stderr, "illegal b frame strategy\n");
1537                 b_frames=0;
1538             }
1539
1540             emms_c();
1541 //static int b_count=0;
1542 //b_count+= b_frames;
1543 //printf("b_frames: %d\n", b_count);
1544                         
1545             s->reordered_input_picture[0]= s->input_picture[b_frames];
1546             if(   s->picture_in_gop_number + b_frames >= s->gop_size 
1547                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1548                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1549             else
1550                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1551             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1552             for(i=0; i<b_frames; i++){
1553                 coded_pic_num++;
1554                 s->reordered_input_picture[i+1]= s->input_picture[i];
1555                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1556                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1557             }
1558         }
1559     }
1560     
1561     if(s->reordered_input_picture[0]){
1562         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
1563
1564         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1565
1566         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1567             // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
1568         
1569             int i= find_unused_picture(s, 0);
1570             Picture *pic= &s->picture[i];
1571
1572             /* mark us unused / free shared pic */
1573             for(i=0; i<4; i++)
1574                 s->reordered_input_picture[0]->data[i]= NULL;
1575             s->reordered_input_picture[0]->type= 0;
1576             
1577             //FIXME bad, copy * except
1578             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1579             pic->quality   = s->reordered_input_picture[0]->quality;
1580             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1581             pic->reference = s->reordered_input_picture[0]->reference;
1582             pic->pts = s->reordered_input_picture[0]->pts;
1583             
1584             alloc_picture(s, pic, 0);
1585
1586             s->current_picture_ptr= pic;
1587         }else{
1588             // input is not a shared pix -> reuse buffer for current_pix
1589
1590             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
1591                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1592             
1593             s->current_picture_ptr= s->reordered_input_picture[0];
1594             for(i=0; i<4; i++){
1595                 s->new_picture.data[i]+=16;
1596             }
1597         }
1598         copy_picture(&s->current_picture, s->current_picture_ptr);
1599     
1600         s->picture_number= s->new_picture.display_picture_number;
1601 //printf("dpn:%d\n", s->picture_number);
1602     }else{
1603        memset(&s->new_picture, 0, sizeof(Picture));
1604     }
1605 }
1606
1607 int MPV_encode_picture(AVCodecContext *avctx,
1608                        unsigned char *buf, int buf_size, void *data)
1609 {
1610     MpegEncContext *s = avctx->priv_data;
1611     AVFrame *pic_arg = data;
1612     int i;
1613
1614     if(avctx->pix_fmt != PIX_FMT_YUV420P){
1615         fprintf(stderr, "this codec supports only YUV420P\n");
1616         return -1;
1617     }
1618     
1619     init_put_bits(&s->pb, buf, buf_size);
1620
1621     s->picture_in_gop_number++;
1622
1623     load_input_picture(s, pic_arg);
1624     
1625     select_input_picture(s);
1626     
1627     /* output? */
1628     if(s->new_picture.data[0]){
1629
1630         s->pict_type= s->new_picture.pict_type;
1631 //emms_c();
1632 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1633         MPV_frame_start(s, avctx);
1634
1635         encode_picture(s, s->picture_number);
1636         
1637         avctx->real_pict_num  = s->picture_number;
1638         avctx->header_bits = s->header_bits;
1639         avctx->mv_bits     = s->mv_bits;
1640         avctx->misc_bits   = s->misc_bits;
1641         avctx->i_tex_bits  = s->i_tex_bits;
1642         avctx->p_tex_bits  = s->p_tex_bits;
1643         avctx->i_count     = s->i_count;
1644         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1645         avctx->skip_count  = s->skip_count;
1646
1647         MPV_frame_end(s);
1648
1649         if (s->out_format == FMT_MJPEG)
1650             mjpeg_picture_trailer(s);
1651         
1652         if(s->flags&CODEC_FLAG_PASS1)
1653             ff_write_pass1_stats(s);
1654
1655         for(i=0; i<4; i++){
1656             avctx->error[i] += s->current_picture_ptr->error[i];
1657         }
1658     }
1659
1660     s->input_picture_number++;
1661
1662     flush_put_bits(&s->pb);
1663     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1664     
1665     s->total_bits += s->frame_bits;
1666     avctx->frame_bits  = s->frame_bits;
1667     
1668     return pbBufPtr(&s->pb) - s->pb.buf;
1669 }
1670
1671 #endif //CONFIG_ENCODERS
1672
1673 static inline void gmc1_motion(MpegEncContext *s,
1674                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1675                                int dest_offset,
1676                                uint8_t **ref_picture, int src_offset)
1677 {
1678     uint8_t *ptr;
1679     int offset, src_x, src_y, linesize, uvlinesize;
1680     int motion_x, motion_y;
1681     int emu=0;
1682
1683     motion_x= s->sprite_offset[0][0];
1684     motion_y= s->sprite_offset[0][1];
1685     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1686     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1687     motion_x<<=(3-s->sprite_warping_accuracy);
1688     motion_y<<=(3-s->sprite_warping_accuracy);
1689     src_x = clip(src_x, -16, s->width);
1690     if (src_x == s->width)
1691         motion_x =0;
1692     src_y = clip(src_y, -16, s->height);
1693     if (src_y == s->height)
1694         motion_y =0;
1695
1696     linesize = s->linesize;
1697     uvlinesize = s->uvlinesize;
1698     
1699     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1700
1701     dest_y+=dest_offset;
1702     if(s->flags&CODEC_FLAG_EMU_EDGE){
1703         if(   (unsigned)src_x >= s->h_edge_pos - 17
1704            || (unsigned)src_y >= s->v_edge_pos - 17){
1705             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1706             ptr= s->edge_emu_buffer;
1707         }
1708     }
1709     
1710     if((motion_x|motion_y)&7){
1711         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1712         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1713     }else{
1714         int dxy;
1715         
1716         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1717         if (s->no_rounding){
1718             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1719         }else{
1720             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1721         }
1722     }
1723     
1724     if(s->flags&CODEC_FLAG_GRAY) return;
1725
1726     motion_x= s->sprite_offset[1][0];
1727     motion_y= s->sprite_offset[1][1];
1728     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1729     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1730     motion_x<<=(3-s->sprite_warping_accuracy);
1731     motion_y<<=(3-s->sprite_warping_accuracy);
1732     src_x = clip(src_x, -8, s->width>>1);
1733     if (src_x == s->width>>1)
1734         motion_x =0;
1735     src_y = clip(src_y, -8, s->height>>1);
1736     if (src_y == s->height>>1)
1737         motion_y =0;
1738
1739     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1740     ptr = ref_picture[1] + offset;
1741     if(s->flags&CODEC_FLAG_EMU_EDGE){
1742         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
1743            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
1744             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1745             ptr= s->edge_emu_buffer;
1746             emu=1;
1747         }
1748     }
1749     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1750     
1751     ptr = ref_picture[2] + offset;
1752     if(emu){
1753         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1754         ptr= s->edge_emu_buffer;
1755     }
1756     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1757     
1758     return;
1759 }
1760
1761 static inline void gmc_motion(MpegEncContext *s,
1762                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763                                int dest_offset,
1764                                uint8_t **ref_picture, int src_offset)
1765 {
1766     uint8_t *ptr;
1767     int linesize, uvlinesize;
1768     const int a= s->sprite_warping_accuracy;
1769     int ox, oy;
1770
1771     linesize = s->linesize;
1772     uvlinesize = s->uvlinesize;
1773
1774     ptr = ref_picture[0] + src_offset;
1775
1776     dest_y+=dest_offset;
1777     
1778     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1779     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1780
1781     s->dsp.gmc(dest_y, ptr, linesize, 16,
1782            ox, 
1783            oy, 
1784            s->sprite_delta[0][0], s->sprite_delta[0][1],
1785            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1786            a+1, (1<<(2*a+1)) - s->no_rounding,
1787            s->h_edge_pos, s->v_edge_pos);
1788     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1789            ox + s->sprite_delta[0][0]*8, 
1790            oy + s->sprite_delta[1][0]*8, 
1791            s->sprite_delta[0][0], s->sprite_delta[0][1],
1792            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1793            a+1, (1<<(2*a+1)) - s->no_rounding,
1794            s->h_edge_pos, s->v_edge_pos);
1795
1796     if(s->flags&CODEC_FLAG_GRAY) return;
1797
1798
1799     dest_cb+=dest_offset>>1;
1800     dest_cr+=dest_offset>>1;
1801     
1802     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1803     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1804
1805     ptr = ref_picture[1] + (src_offset>>1);
1806     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1807            ox, 
1808            oy, 
1809            s->sprite_delta[0][0], s->sprite_delta[0][1],
1810            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1811            a+1, (1<<(2*a+1)) - s->no_rounding,
1812            s->h_edge_pos>>1, s->v_edge_pos>>1);
1813     
1814     ptr = ref_picture[2] + (src_offset>>1);
1815     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1816            ox, 
1817            oy, 
1818            s->sprite_delta[0][0], s->sprite_delta[0][1],
1819            s->sprite_delta[1][0], s->sprite_delta[1][1], 
1820            a+1, (1<<(2*a+1)) - s->no_rounding,
1821            s->h_edge_pos>>1, s->v_edge_pos>>1);
1822 }
1823
1824 /**
1825  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1826  * @param buf destination buffer
1827  * @param src source buffer
1828  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1829  * @param block_w width of block
1830  * @param block_h height of block
1831  * @param src_x x coordinate of the top left sample of the block in the source buffer
1832  * @param src_y y coordinate of the top left sample of the block in the source buffer
1833  * @param w width of the source buffer
1834  * @param h height of the source buffer
1835  */
1836 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
1837                                     int src_x, int src_y, int w, int h){
1838     int x, y;
1839     int start_y, start_x, end_y, end_x;
1840
1841     if(src_y>= h){
1842         src+= (h-1-src_y)*linesize;
1843         src_y=h-1;
1844     }else if(src_y<=-block_h){
1845         src+= (1-block_h-src_y)*linesize;
1846         src_y=1-block_h;
1847     }
1848     if(src_x>= w){
1849         src+= (w-1-src_x);
1850         src_x=w-1;
1851     }else if(src_x<=-block_w){
1852         src+= (1-block_w-src_x);
1853         src_x=1-block_w;
1854     }
1855
1856     start_y= FFMAX(0, -src_y);
1857     start_x= FFMAX(0, -src_x);
1858     end_y= FFMIN(block_h, h-src_y);
1859     end_x= FFMIN(block_w, w-src_x);
1860
1861     // copy existing part
1862     for(y=start_y; y<end_y; y++){
1863         for(x=start_x; x<end_x; x++){
1864             buf[x + y*linesize]= src[x + y*linesize];
1865         }
1866     }
1867
1868     //top
1869     for(y=0; y<start_y; y++){
1870         for(x=start_x; x<end_x; x++){
1871             buf[x + y*linesize]= buf[x + start_y*linesize];
1872         }
1873     }
1874
1875     //bottom
1876     for(y=end_y; y<block_h; y++){
1877         for(x=start_x; x<end_x; x++){
1878             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1879         }
1880     }
1881                                     
1882     for(y=0; y<block_h; y++){
1883        //left
1884         for(x=0; x<start_x; x++){
1885             buf[x + y*linesize]= buf[start_x + y*linesize];
1886         }
1887        
1888        //right
1889         for(x=end_x; x<block_w; x++){
1890             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1891         }
1892     }
1893 }
1894
1895
1896 /* apply one mpeg motion vector to the three components */
1897 static inline void mpeg_motion(MpegEncContext *s,
1898                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1899                                int dest_offset,
1900                                uint8_t **ref_picture, int src_offset,
1901                                int field_based, op_pixels_func (*pix_op)[4],
1902                                int motion_x, int motion_y, int h)
1903 {
1904     uint8_t *ptr;
1905     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1906     int emu=0;
1907 #if 0    
1908 if(s->quarter_sample)
1909 {
1910     motion_x>>=1;
1911     motion_y>>=1;
1912 }
1913 #endif
1914     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1915     src_x = s->mb_x * 16 + (motion_x >> 1);
1916     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1917                 
1918     /* WARNING: do no forget half pels */
1919     height = s->height >> field_based;
1920     v_edge_pos = s->v_edge_pos >> field_based;
1921     src_x = clip(src_x, -16, s->width);
1922     if (src_x == s->width)
1923         dxy &= ~1;
1924     src_y = clip(src_y, -16, height);
1925     if (src_y == height)
1926         dxy &= ~2;
1927     linesize   = s->current_picture.linesize[0] << field_based;
1928     uvlinesize = s->current_picture.linesize[1] << field_based;
1929     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1930     dest_y += dest_offset;
1931
1932     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
1933         if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
1934            || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
1935             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
1936                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1937             ptr= s->edge_emu_buffer + src_offset;
1938             emu=1;
1939         }
1940     }
1941     pix_op[0][dxy](dest_y, ptr, linesize, h);
1942
1943     if(s->flags&CODEC_FLAG_GRAY) return;
1944
1945     if (s->out_format == FMT_H263) {
1946         dxy = 0;
1947         if ((motion_x & 3) != 0)
1948             dxy |= 1;
1949         if ((motion_y & 3) != 0)
1950             dxy |= 2;
1951         mx = motion_x >> 2;
1952         my = motion_y >> 2;
1953     } else {
1954         mx = motion_x / 2;
1955         my = motion_y / 2;
1956         dxy = ((my & 1) << 1) | (mx & 1);
1957         mx >>= 1;
1958         my >>= 1;
1959     }
1960     
1961     src_x = s->mb_x * 8 + mx;
1962     src_y = s->mb_y * (8 >> field_based) + my;
1963     src_x = clip(src_x, -8, s->width >> 1);
1964     if (src_x == (s->width >> 1))
1965         dxy &= ~1;
1966     src_y = clip(src_y, -8, height >> 1);
1967     if (src_y == (height >> 1))
1968         dxy &= ~2;
1969     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1970     ptr = ref_picture[1] + offset;
1971     if(emu){
1972         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1973                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1974         ptr= s->edge_emu_buffer + (src_offset >> 1);
1975     }
1976     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1977
1978     ptr = ref_picture[2] + offset;
1979     if(emu){
1980         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
1981                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1982         ptr= s->edge_emu_buffer + (src_offset >> 1);
1983     }
1984     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1985 }
1986
1987 static inline void qpel_motion(MpegEncContext *s,
1988                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1989                                int dest_offset,
1990                                uint8_t **ref_picture, int src_offset,
1991                                int field_based, op_pixels_func (*pix_op)[4],
1992                                qpel_mc_func (*qpix_op)[16],
1993                                int motion_x, int motion_y, int h)
1994 {
1995     uint8_t *ptr;
1996     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1997     int emu=0;
1998
1999     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2000     src_x = s->mb_x * 16 + (motion_x >> 2);
2001     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
2002
2003     height = s->height >> field_based;
2004     v_edge_pos = s->v_edge_pos >> field_based;
2005     src_x = clip(src_x, -16, s->width);
2006     if (src_x == s->width)
2007         dxy &= ~3;
2008     src_y = clip(src_y, -16, height);
2009     if (src_y == height)
2010         dxy &= ~12;
2011     linesize = s->linesize << field_based;
2012     uvlinesize = s->uvlinesize << field_based;
2013     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
2014     dest_y += dest_offset;
2015 //printf("%d %d %d\n", src_x, src_y, dxy);
2016     
2017     if(s->flags&CODEC_FLAG_EMU_EDGE){
2018         if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
2019            || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
2020             ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based, 
2021                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2022             ptr= s->edge_emu_buffer + src_offset;
2023             emu=1;
2024         }
2025     }
2026     if(!field_based)
2027         qpix_op[0][dxy](dest_y, ptr, linesize);
2028     else{
2029         //damn interlaced mode
2030         //FIXME boundary mirroring is not exactly correct here
2031         qpix_op[1][dxy](dest_y  , ptr  , linesize);
2032         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
2033     }
2034
2035     if(s->flags&CODEC_FLAG_GRAY) return;
2036
2037     if(field_based){
2038         mx= motion_x/2;
2039         my= motion_y>>1;
2040     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
2041         static const int rtab[8]= {0,0,1,1,0,0,0,1};
2042         mx= (motion_x>>1) + rtab[motion_x&7];
2043         my= (motion_y>>1) + rtab[motion_y&7];
2044     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
2045         mx= (motion_x>>1)|(motion_x&1);
2046         my= (motion_y>>1)|(motion_y&1);
2047     }else{
2048         mx= motion_x/2;
2049         my= motion_y/2;
2050     }
2051     mx= (mx>>1)|(mx&1);
2052     my= (my>>1)|(my&1);
2053
2054     dxy= (mx&1) | ((my&1)<<1);
2055     mx>>=1;
2056     my>>=1;
2057
2058     src_x = s->mb_x * 8 + mx;
2059     src_y = s->mb_y * (8 >> field_based) + my;
2060     src_x = clip(src_x, -8, s->width >> 1);
2061     if (src_x == (s->width >> 1))
2062         dxy &= ~1;
2063     src_y = clip(src_y, -8, height >> 1);
2064     if (src_y == (height >> 1))
2065         dxy &= ~2;
2066
2067     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
2068     ptr = ref_picture[1] + offset;
2069     if(emu){
2070         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
2071                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2072         ptr= s->edge_emu_buffer + (src_offset >> 1);
2073     }
2074     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2075     
2076     ptr = ref_picture[2] + offset;
2077     if(emu){
2078         ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
2079                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2080         ptr= s->edge_emu_buffer + (src_offset >> 1);
2081     }
2082     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
2083 }
2084
2085 inline int ff_h263_round_chroma(int x){
2086     if (x >= 0)
2087         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2088     else {
2089         x = -x;
2090         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
2091     }
2092 }
2093
2094 /**
2095  * motion compesation of a single macroblock
2096  * @param s context
2097  * @param dest_y luma destination pointer
2098  * @param dest_cb chroma cb/u destination pointer
2099  * @param dest_cr chroma cr/v destination pointer
2100  * @param dir direction (0->forward, 1->backward)
2101  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
2102  * @param pic_op halfpel motion compensation function (average or put normally)
2103  * @param pic_op qpel motion compensation function (average or put normally)
2104  * the motion vectors are taken from s->mv and the MV type from s->mv_type
2105  */
2106 static inline void MPV_motion(MpegEncContext *s, 
2107                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2108                               int dir, uint8_t **ref_picture, 
2109                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
2110 {
2111     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
2112     int mb_x, mb_y, i;
2113     uint8_t *ptr, *dest;
2114     int emu=0;
2115
2116     mb_x = s->mb_x;
2117     mb_y = s->mb_y;
2118
2119     switch(s->mv_type) {
2120     case MV_TYPE_16X16:
2121 #ifdef CONFIG_RISKY
2122         if(s->mcsel){
2123             if(s->real_sprite_warping_points==1){
2124                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
2125                             ref_picture, 0);
2126             }else{
2127                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
2128                             ref_picture, 0);
2129             }
2130         }else if(s->quarter_sample){
2131             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2132                         ref_picture, 0,
2133                         0, pix_op, qpix_op,
2134                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2135         }else if(s->mspel){
2136             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
2137                         ref_picture, pix_op,
2138                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2139         }else
2140 #endif
2141         {
2142             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2143                         ref_picture, 0,
2144                         0, pix_op,
2145                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2146         }           
2147         break;
2148     case MV_TYPE_8X8:
2149         mx = 0;
2150         my = 0;
2151         if(s->quarter_sample){
2152             for(i=0;i<4;i++) {
2153                 motion_x = s->mv[dir][i][0];
2154                 motion_y = s->mv[dir][i][1];
2155
2156                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
2157                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
2158                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
2159                     
2160                 /* WARNING: do no forget half pels */
2161                 src_x = clip(src_x, -16, s->width);
2162                 if (src_x == s->width)
2163                     dxy &= ~3;
2164                 src_y = clip(src_y, -16, s->height);
2165                 if (src_y == s->height)
2166                     dxy &= ~12;
2167                     
2168                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2169                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2170                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
2171                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
2172                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2173                         ptr= s->edge_emu_buffer;
2174                     }
2175                 }
2176                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2177                 qpix_op[1][dxy](dest, ptr, s->linesize);
2178
2179                 mx += s->mv[dir][i][0]/2;
2180                 my += s->mv[dir][i][1]/2;
2181             }
2182         }else{
2183             for(i=0;i<4;i++) {
2184                 motion_x = s->mv[dir][i][0];
2185                 motion_y = s->mv[dir][i][1];
2186
2187                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2188                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
2189                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
2190                     
2191                 /* WARNING: do no forget half pels */
2192                 src_x = clip(src_x, -16, s->width);
2193                 if (src_x == s->width)
2194                     dxy &= ~1;
2195                 src_y = clip(src_y, -16, s->height);
2196                 if (src_y == s->height)
2197                     dxy &= ~2;
2198                     
2199                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
2200                 if(s->flags&CODEC_FLAG_EMU_EDGE){
2201                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 8
2202                        || (unsigned)src_y > s->v_edge_pos - (motion_y&1) - 8){
2203                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2204                         ptr= s->edge_emu_buffer;
2205                     }
2206                 }
2207                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
2208                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
2209
2210                 mx += s->mv[dir][i][0];
2211                 my += s->mv[dir][i][1];
2212             }
2213         }
2214
2215         if(s->flags&CODEC_FLAG_GRAY) break;
2216         /* In case of 8X8, we construct a single chroma motion vector
2217            with a special rounding */
2218         mx= ff_h263_round_chroma(mx);
2219         my= ff_h263_round_chroma(my);
2220         dxy = ((my & 1) << 1) | (mx & 1);
2221         mx >>= 1;
2222         my >>= 1;
2223
2224         src_x = mb_x * 8 + mx;
2225         src_y = mb_y * 8 + my;
2226         src_x = clip(src_x, -8, s->width/2);
2227         if (src_x == s->width/2)
2228             dxy &= ~1;
2229         src_y = clip(src_y, -8, s->height/2);
2230         if (src_y == s->height/2)
2231             dxy &= ~2;
2232         
2233         offset = (src_y * (s->uvlinesize)) + src_x;
2234         ptr = ref_picture[1] + offset;
2235         if(s->flags&CODEC_FLAG_EMU_EDGE){
2236                 if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
2237                    || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
2238                     ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2239                     ptr= s->edge_emu_buffer;
2240                     emu=1;
2241                 }
2242             }
2243         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
2244
2245         ptr = ref_picture[2] + offset;
2246         if(emu){
2247             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2248             ptr= s->edge_emu_buffer;
2249         }
2250         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
2251         break;
2252     case MV_TYPE_FIELD:
2253         if (s->picture_structure == PICT_FRAME) {
2254             if(s->quarter_sample){
2255                 /* top field */
2256                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
2257                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2258                             1, pix_op, qpix_op,
2259                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2260                 /* bottom field */
2261                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2262                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2263                             1, pix_op, qpix_op,
2264                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2265             }else{
2266                 /* top field */       
2267                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2268                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
2269                             1, pix_op,
2270                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
2271                 /* bottom field */
2272                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2273                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
2274                             1, pix_op,
2275                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
2276             }
2277         } else {
2278             int offset;
2279             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2280                 offset= s->field_select[dir][0] ? s->linesize : 0;
2281             }else{
2282                 ref_picture= s->current_picture.data;
2283                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
2284             } 
2285
2286             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2287                         ref_picture, offset,
2288                         0, pix_op,
2289                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
2290         }
2291         break;
2292     case MV_TYPE_16X8:{
2293         int offset;
2294          uint8_t ** ref2picture;
2295
2296             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
2297                 ref2picture= ref_picture;
2298                 offset= s->field_select[dir][0] ? s->linesize : 0;
2299             }else{
2300                 ref2picture= s->current_picture.data;
2301                 offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
2302             } 
2303
2304             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2305                         ref2picture, offset,
2306                         0, pix_op,
2307                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2308
2309
2310             if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
2311                 ref2picture= ref_picture;
2312                 offset= s->field_select[dir][1] ? s->linesize : 0;
2313             }else{
2314                 ref2picture= s->current_picture.data;
2315                 offset= s->field_select[dir][1] ? s->linesize : -s->linesize; 
2316             } 
2317             // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
2318             mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
2319                         0,
2320                         ref2picture, offset,
2321                         0, pix_op,
2322                         s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
2323         }
2324         
2325         break;
2326     case MV_TYPE_DMV:
2327     {
2328     op_pixels_func (*dmv_pix_op)[4];
2329     int offset;
2330
2331         dmv_pix_op = s->dsp.put_pixels_tab;
2332
2333         if(s->picture_structure == PICT_FRAME){
2334             //put top field from top field
2335             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2336                         ref_picture, 0,
2337                         1, dmv_pix_op,
2338                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2339             //put bottom field from bottom field
2340             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2341                         ref_picture, s->linesize,
2342                         1, dmv_pix_op,
2343                         s->mv[dir][0][0], s->mv[dir][0][1], 8);
2344
2345             dmv_pix_op = s->dsp.avg_pixels_tab; 
2346         
2347             //avg top field from bottom field
2348             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2349                         ref_picture, s->linesize,
2350                         1, dmv_pix_op,
2351                         s->mv[dir][2][0], s->mv[dir][2][1], 8);
2352             //avg bottom field from top field
2353             mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
2354                         ref_picture, 0,
2355                         1, dmv_pix_op,
2356                         s->mv[dir][3][0], s->mv[dir][3][1], 8);
2357
2358         }else{
2359             offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2360                          s->linesize : 0;
2361
2362             //put field from the same parity
2363             //same parity is never in the same frame
2364             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
2365                         ref_picture,offset,
2366                         0,dmv_pix_op,
2367                         s->mv[dir][0][0],s->mv[dir][0][1],16);
2368
2369             // after put we make avg of the same block
2370             dmv_pix_op=s->dsp.avg_pixels_tab; 
2371
2372             //opposite parity is always in the same frame if this is second field
2373             if(!s->first_field){
2374                 ref_picture = s->current_picture.data;    
2375                 //top field is one linesize from frame beginig
2376                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2377                         -s->linesize : s->linesize;
2378             }else 
2379                 offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
2380                         0 : s->linesize;
2381
2382             //avg field from the opposite parity
2383             mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
2384                         ref_picture, offset,
2385                         0,dmv_pix_op,
2386                         s->mv[dir][2][0],s->mv[dir][2][1],16);
2387         }
2388     }
2389     break;
2390
2391     }
2392 }
2393
2394
2395 /* put block[] to dest[] */
2396 static inline void put_dct(MpegEncContext *s, 
2397                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2398 {
2399     s->dct_unquantize(s, block, i, s->qscale);
2400     s->dsp.idct_put (dest, line_size, block);
2401 }
2402
2403 /* add block[] to dest[] */
2404 static inline void add_dct(MpegEncContext *s, 
2405                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2406 {
2407     if (s->block_last_index[i] >= 0) {
2408         s->dsp.idct_add (dest, line_size, block);
2409     }
2410 }
2411
2412 static inline void add_dequant_dct(MpegEncContext *s, 
2413                            DCTELEM *block, int i, uint8_t *dest, int line_size)
2414 {
2415     if (s->block_last_index[i] >= 0) {
2416         s->dct_unquantize(s, block, i, s->qscale);
2417
2418         s->dsp.idct_add (dest, line_size, block);
2419     }
2420 }
2421
2422 /**
2423  * cleans dc, ac, coded_block for the current non intra MB
2424  */
2425 void ff_clean_intra_table_entries(MpegEncContext *s)
2426 {
2427     int wrap = s->block_wrap[0];
2428     int xy = s->block_index[0];
2429     
2430     s->dc_val[0][xy           ] = 
2431     s->dc_val[0][xy + 1       ] = 
2432     s->dc_val[0][xy     + wrap] =
2433     s->dc_val[0][xy + 1 + wrap] = 1024;
2434     /* ac pred */
2435     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
2436     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
2437     if (s->msmpeg4_version>=3) {
2438         s->coded_block[xy           ] =
2439         s->coded_block[xy + 1       ] =
2440         s->coded_block[xy     + wrap] =
2441         s->coded_block[xy + 1 + wrap] = 0;
2442     }
2443     /* chroma */
2444     wrap = s->block_wrap[4];
2445     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
2446     s->dc_val[1][xy] =
2447     s->dc_val[2][xy] = 1024;
2448     /* ac pred */
2449     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
2450     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
2451     
2452     s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
2453 }
2454
2455 /* generic function called after a macroblock has been parsed by the
2456    decoder or after it has been encoded by the encoder.
2457
2458    Important variables used:
2459    s->mb_intra : true if intra macroblock
2460    s->mv_dir   : motion vector direction
2461    s->mv_type  : motion vector type
2462    s->mv       : motion vector
2463    s->interlaced_dct : true if interlaced dct used (mpeg2)
2464  */
2465 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
2466 {
2467     int mb_x, mb_y;
2468     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
2469 #ifdef HAVE_XVMC
2470     if(s->avctx->xvmc_acceleration){
2471         XVMC_decode_mb(s,block);
2472         return;
2473     }
2474 #endif
2475
2476     mb_x = s->mb_x;
2477     mb_y = s->mb_y;
2478
2479     s->current_picture.qscale_table[mb_xy]= s->qscale;
2480
2481     /* update DC predictors for P macroblocks */
2482     if (!s->mb_intra) {
2483         if (s->h263_pred || s->h263_aic) {
2484             if(s->mbintra_table[mb_xy])
2485                 ff_clean_intra_table_entries(s);
2486         } else {
2487             s->last_dc[0] =
2488             s->last_dc[1] =
2489             s->last_dc[2] = 128 << s->intra_dc_precision;
2490         }
2491     }
2492     else if (s->h263_pred || s->h263_aic)
2493         s->mbintra_table[mb_xy]=1;
2494
2495     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
2496         uint8_t *dest_y, *dest_cb, *dest_cr;
2497         int dct_linesize, dct_offset;
2498         op_pixels_func (*op_pix)[4];
2499         qpel_mc_func (*op_qpix)[16];
2500         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2501         const int uvlinesize= s->current_picture.linesize[1];
2502
2503         /* avoid copy if macroblock skipped in last frame too */
2504         /* skip only during decoding as we might trash the buffers during encoding a bit */
2505         if(!s->encoding){
2506             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
2507             const int age= s->current_picture.age;
2508
2509             assert(age);
2510
2511             if (s->mb_skiped) {
2512                 s->mb_skiped= 0;
2513                 assert(s->pict_type!=I_TYPE);
2514  
2515                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
2516                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2517
2518                 /* if previous was skipped too, then nothing to do !  */
2519                 if (*mbskip_ptr >= age && s->current_picture.reference){
2520                     return;
2521                 }
2522             } else if(!s->current_picture.reference){
2523                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
2524                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
2525             } else{
2526                 *mbskip_ptr = 0; /* not skipped */
2527             }
2528         }
2529
2530         if (s->interlaced_dct) {
2531             dct_linesize = linesize * 2;
2532             dct_offset = linesize;
2533         } else {
2534             dct_linesize = linesize;
2535             dct_offset = linesize * 8;
2536         }
2537         
2538         dest_y=  s->dest[0];
2539         dest_cb= s->dest[1];
2540         dest_cr= s->dest[2];
2541
2542         if (!s->mb_intra) {
2543             /* motion handling */
2544             /* decoding or more than one mb_type (MC was allready done otherwise) */
2545             if(!s->encoding){
2546                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
2547                     op_pix = s->dsp.put_pixels_tab;
2548                     op_qpix= s->dsp.put_qpel_pixels_tab;
2549                 }else{
2550                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2551                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2552                 }
2553
2554                 if (s->mv_dir & MV_DIR_FORWARD) {
2555                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2556                     op_pix = s->dsp.avg_pixels_tab;
2557                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2558                 }
2559                 if (s->mv_dir & MV_DIR_BACKWARD) {
2560                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2561                 }
2562             }
2563
2564             /* skip dequant / idct if we are really late ;) */
2565             if(s->hurry_up>1) return;
2566
2567             /* add dct residue */
2568             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
2569                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2570                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2571                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2572                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2573                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2574
2575                 if(!(s->flags&CODEC_FLAG_GRAY)){
2576                     add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
2577                     add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
2578                 }
2579             } else if(s->codec_id != CODEC_ID_WMV2){
2580                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2581                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2582                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2583                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2584
2585                 if(!(s->flags&CODEC_FLAG_GRAY)){
2586                     add_dct(s, block[4], 4, dest_cb, uvlinesize);
2587                     add_dct(s, block[5], 5, dest_cr, uvlinesize);
2588                 }
2589             } 
2590 #ifdef CONFIG_RISKY
2591             else{
2592                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2593             }
2594 #endif
2595         } else {
2596             /* dct only in intra block */
2597             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2598                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2599                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2600                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2601                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2602
2603                 if(!(s->flags&CODEC_FLAG_GRAY)){
2604                     put_dct(s, block[4], 4, dest_cb, uvlinesize);
2605                     put_dct(s, block[5], 5, dest_cr, uvlinesize);
2606                 }
2607             }else{
2608                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
2609                 s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
2610                 s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2611                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2612
2613                 if(!(s->flags&CODEC_FLAG_GRAY)){
2614                     s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2615                     s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2616                 }
2617             }
2618         }
2619     }
2620 }
2621
2622 #ifdef CONFIG_ENCODERS
2623
2624 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2625 {
2626     static const char tab[64]=
2627         {3,2,2,1,1,1,1,1,
2628          1,1,1,1,1,1,1,1,
2629          1,1,1,1,1,1,1,1,
2630          0,0,0,0,0,0,0,0,
2631          0,0,0,0,0,0,0,0,
2632          0,0,0,0,0,0,0,0,
2633          0,0,0,0,0,0,0,0,
2634          0,0,0,0,0,0,0,0};
2635     int score=0;
2636     int run=0;
2637     int i;
2638     DCTELEM *block= s->block[n];
2639     const int last_index= s->block_last_index[n];
2640     int skip_dc;
2641
2642     if(threshold<0){
2643         skip_dc=0;
2644         threshold= -threshold;
2645     }else
2646         skip_dc=1;
2647
2648     /* are all which we could set to zero are allready zero? */
2649     if(last_index<=skip_dc - 1) return;
2650
2651     for(i=0; i<=last_index; i++){
2652         const int j = s->intra_scantable.permutated[i];
2653         const int level = ABS(block[j]);
2654         if(level==1){
2655             if(skip_dc && i==0) continue;
2656             score+= tab[run];
2657             run=0;
2658         }else if(level>1){
2659             return;
2660         }else{
2661             run++;
2662         }
2663     }
2664     if(score >= threshold) return;
2665     for(i=skip_dc; i<=last_index; i++){
2666         const int j = s->intra_scantable.permutated[i];
2667         block[j]=0;
2668     }
2669     if(block[0]) s->block_last_index[n]= 0;
2670     else         s->block_last_index[n]= -1;
2671 }
2672
2673 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2674 {
2675     int i;
2676     const int maxlevel= s->max_qcoeff;
2677     const int minlevel= s->min_qcoeff;
2678     
2679     if(s->mb_intra){
2680         i=1; //skip clipping of intra dc
2681     }else
2682         i=0;
2683     
2684     for(;i<=last_index; i++){
2685         const int j= s->intra_scantable.permutated[i];
2686         int level = block[j];
2687        
2688         if     (level>maxlevel) level=maxlevel;
2689         else if(level<minlevel) level=minlevel;
2690
2691         block[j]= level;
2692     }
2693 }
2694
2695 #if 0
2696 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2697     int score=0;
2698     int x,y;
2699     
2700     for(y=0; y<7; y++){
2701         for(x=0; x<16; x+=4){
2702             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
2703                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2704         }
2705         s+= stride;
2706     }
2707     
2708     return score;
2709 }
2710
2711 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2712     int score=0;
2713     int x,y;
2714     
2715     for(y=0; y<7; y++){
2716         for(x=0; x<16; x++){
2717             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2718         }
2719         s1+= stride;
2720         s2+= stride;
2721     }
2722     
2723     return score;
2724 }
2725 #else
2726 #define SQ(a) ((a)*(a))
2727
2728 static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
2729     int score=0;
2730     int x,y;
2731     
2732     for(y=0; y<7; y++){
2733         for(x=0; x<16; x+=4){
2734             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
2735                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2736         }
2737         s+= stride;
2738     }
2739     
2740     return score;
2741 }
2742
2743 static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
2744     int score=0;
2745     int x,y;
2746     
2747     for(y=0; y<7; y++){
2748         for(x=0; x<16; x++){
2749             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2750         }
2751         s1+= stride;
2752         s2+= stride;
2753     }
2754     
2755     return score;
2756 }
2757
2758 #endif
2759
2760 #endif //CONFIG_ENCODERS
2761
2762 /**
2763  *
2764  * @param h is the normal height, this will be reduced automatically if needed for the last row
2765  */
2766 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2767     if (s->avctx->draw_horiz_band) {
2768         AVFrame *src;
2769         int offset[4];
2770         
2771         if(s->picture_structure != PICT_FRAME){
2772             h <<= 1;
2773             y <<= 1;
2774             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2775         }
2776
2777         h= FFMIN(h, s->height - y);
2778
2779         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
2780             src= (AVFrame*)s->current_picture_ptr;
2781         else if(s->last_picture_ptr)
2782             src= (AVFrame*)s->last_picture_ptr;
2783         else
2784             return;
2785             
2786         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2787             offset[0]=
2788             offset[1]=
2789             offset[2]=
2790             offset[3]= 0;
2791         }else{
2792             offset[0]= y * s->linesize;;
2793             offset[1]= 
2794             offset[2]= (y>>1) * s->uvlinesize;;
2795             offset[3]= 0;
2796         }
2797
2798         emms_c();
2799
2800         s->avctx->draw_horiz_band(s->avctx, src, offset,
2801                                   y, s->picture_structure, h);
2802     }
2803 }
2804
2805 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2806     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
2807     const int uvlinesize= s->current_picture.linesize[1];
2808         
2809     s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2810     s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
2811     s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
2812     s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
2813     s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2814     s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
2815     
2816     if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){
2817         s->dest[0] = s->current_picture.data[0] + s->mb_x * 16 - 16;
2818         s->dest[1] = s->current_picture.data[1] + s->mb_x * 8 - 8;
2819         s->dest[2] = s->current_picture.data[2] + s->mb_x * 8 - 8;
2820     }else{
2821         s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* linesize  ) + s->mb_x * 16 - 16;
2822         s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2823         s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8;
2824     }    
2825 }
2826
2827 #ifdef CONFIG_ENCODERS
2828
2829 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2830 {
2831     const int mb_x= s->mb_x;
2832     const int mb_y= s->mb_y;
2833     int i;
2834     int skip_dct[6];
2835     int dct_offset   = s->linesize*8; //default for progressive frames
2836     
2837     for(i=0; i<6; i++) skip_dct[i]=0;
2838     
2839     if(s->adaptive_quant){
2840         const int last_qp= s->qscale;
2841         const int mb_xy= mb_x + mb_y*s->mb_stride;
2842
2843         s->lambda= s->lambda_table[mb_xy];
2844         update_qscale(s);
2845         s->dquant= s->qscale - last_qp;
2846
2847         if(s->out_format==FMT_H263)
2848             s->dquant= clip(s->dquant, -2, 2); //FIXME RD
2849             
2850         if(s->codec_id==CODEC_ID_MPEG4){        
2851             if(!s->mb_intra){
2852                 if((s->mv_dir&MV_DIRECT) || s->mv_type==MV_TYPE_8X8)
2853                     s->dquant=0;
2854             }
2855         }
2856         s->qscale= last_qp + s->dquant;
2857         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2858         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2859     }
2860
2861     if (s->mb_intra) {
2862         uint8_t *ptr;
2863         int wrap_y;
2864         int emu=0;
2865
2866         wrap_y = s->linesize;
2867         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2868
2869         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2870             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2871             ptr= s->edge_emu_buffer;
2872             emu=1;
2873         }
2874         
2875         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2876             int progressive_score, interlaced_score;
2877             
2878             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2879             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2880             
2881             if(progressive_score > interlaced_score + 100){
2882                 s->interlaced_dct=1;
2883             
2884                 dct_offset= wrap_y;
2885                 wrap_y<<=1;
2886             }else
2887                 s->interlaced_dct=0;
2888         }
2889         
2890         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2891         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2892         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2893         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2894
2895         if(s->flags&CODEC_FLAG_GRAY){
2896             skip_dct[4]= 1;
2897             skip_dct[5]= 1;
2898         }else{
2899             int wrap_c = s->uvlinesize;
2900             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2901             if(emu){
2902                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2903                 ptr= s->edge_emu_buffer;
2904             }
2905             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2906
2907             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2908             if(emu){
2909                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2910                 ptr= s->edge_emu_buffer;
2911             }
2912             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2913         }
2914     }else{
2915         op_pixels_func (*op_pix)[4];
2916         qpel_mc_func (*op_qpix)[16];
2917         uint8_t *dest_y, *dest_cb, *dest_cr;
2918         uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2919         int wrap_y, wrap_c;
2920         int emu=0;
2921
2922         dest_y  = s->dest[0];
2923         dest_cb = s->dest[1];
2924         dest_cr = s->dest[2];
2925         wrap_y = s->linesize;
2926         wrap_c = s->uvlinesize;
2927         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2928         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2929         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2930
2931         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2932             op_pix = s->dsp.put_pixels_tab;
2933             op_qpix= s->dsp.put_qpel_pixels_tab;
2934         }else{
2935             op_pix = s->dsp.put_no_rnd_pixels_tab;
2936             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2937         }
2938
2939         if (s->mv_dir & MV_DIR_FORWARD) {
2940             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2941             op_pix = s->dsp.avg_pixels_tab;
2942             op_qpix= s->dsp.avg_qpel_pixels_tab;
2943         }
2944         if (s->mv_dir & MV_DIR_BACKWARD) {
2945             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2946         }
2947
2948         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2949             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2950             ptr_y= s->edge_emu_buffer;
2951             emu=1;
2952         }
2953         
2954         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2955             int progressive_score, interlaced_score;
2956             
2957             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
2958                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2959             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2960                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2961             
2962             if(progressive_score > interlaced_score + 600){
2963                 s->interlaced_dct=1;
2964             
2965                 dct_offset= wrap_y;
2966                 wrap_y<<=1;
2967             }else
2968                 s->interlaced_dct=0;
2969         }
2970         
2971         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2972         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2973         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2974         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2975         
2976         if(s->flags&CODEC_FLAG_GRAY){
2977             skip_dct[4]= 1;
2978             skip_dct[5]= 1;
2979         }else{
2980             if(emu){
2981                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2982                 ptr_cb= s->edge_emu_buffer;
2983             }
2984             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2985             if(emu){
2986                 ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2987                 ptr_cr= s->edge_emu_buffer;
2988             }
2989             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2990         }
2991         /* pre quantization */         
2992         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
2993             //FIXME optimize
2994             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2995             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2996             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2997             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2998             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2999             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
3000 #if 0
3001 {
3002  static int stat[7];
3003  int num=0;
3004  for(i=0; i<6; i++)
3005   if(skip_dct[i]) num++;
3006  stat[num]++;
3007  
3008  if(s->mb_x==0 && s->mb_y==0){
3009   for(i=0; i<7; i++){
3010    printf("%6d %1d\n", stat[i], i);
3011   }
3012  }
3013 }
3014 #endif
3015         }
3016
3017     }
3018             
3019     /* DCT & quantize */
3020     if(s->out_format==FMT_MJPEG){
3021         for(i=0;i<6;i++) {
3022             int overflow;
3023             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
3024             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3025         }
3026     }else{
3027         for(i=0;i<6;i++) {
3028             if(!skip_dct[i]){
3029                 int overflow;
3030                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
3031             // FIXME we could decide to change to quantizer instead of clipping
3032             // JS: I don't think that would be a good idea it could lower quality instead
3033             //     of improve it. Just INTRADC clipping deserves changes in quantizer
3034                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
3035             }else
3036                 s->block_last_index[i]= -1;
3037         }
3038         
3039         if(s->luma_elim_threshold && !s->mb_intra)
3040             for(i=0; i<4; i++)
3041                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
3042         if(s->chroma_elim_threshold && !s->mb_intra)
3043             for(i=4; i<6; i++)
3044                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
3045
3046         if(s->flags & CODEC_FLAG_CBP_RD){
3047             for(i=0;i<6;i++) {
3048                 if(s->block_last_index[i] == -1)
3049                     s->coded_score[i]= INT_MAX/256;
3050             }
3051         }
3052     }
3053
3054     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
3055         s->block_last_index[4]=
3056         s->block_last_index[5]= 0;
3057         s->block[4][0]=
3058         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
3059     }
3060
3061     /* huffman encode */
3062     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
3063     case CODEC_ID_MPEG1VIDEO:
3064     case CODEC_ID_MPEG2VIDEO:
3065         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
3066 #ifdef CONFIG_RISKY
3067     case CODEC_ID_MPEG4:
3068         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3069     case CODEC_ID_MSMPEG4V2:
3070     case CODEC_ID_MSMPEG4V3:
3071     case CODEC_ID_WMV1:
3072         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
3073     case CODEC_ID_WMV2:
3074          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
3075     case CODEC_ID_H263:
3076     case CODEC_ID_H263P:
3077     case CODEC_ID_FLV1:
3078     case CODEC_ID_RV10:
3079         h263_encode_mb(s, s->block, motion_x, motion_y); break;
3080 #endif
3081     case CODEC_ID_MJPEG:
3082         mjpeg_encode_mb(s, s->block); break;
3083     default:
3084         assert(0);
3085     }
3086 }
3087
3088 #endif //CONFIG_ENCODERS
3089
3090 /**
3091  * combines the (truncated) bitstream to a complete frame
3092  * @returns -1 if no complete frame could be created
3093  */
3094 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
3095     ParseContext *pc= &s->parse_context;
3096
3097 #if 0
3098     if(pc->overread){
3099         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3100         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3101     }
3102 #endif
3103
3104     /* copy overreaded byes from last frame into buffer */
3105     for(; pc->overread>0; pc->overread--){
3106         pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
3107     }
3108     
3109     pc->last_index= pc->index;
3110
3111     /* copy into buffer end return */
3112     if(next == END_NOT_FOUND){
3113         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3114
3115         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
3116         pc->index += *buf_size;
3117         return -1;
3118     }
3119
3120     *buf_size=
3121     pc->overread_index= pc->index + next;
3122     
3123     /* append to buffer */
3124     if(pc->index){
3125         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
3126
3127         memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
3128         pc->index = 0;
3129         *buf= pc->buffer;
3130     }
3131
3132     /* store overread bytes */
3133     for(;next < 0; next++){
3134         pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
3135         pc->overread++;
3136     }
3137
3138 #if 0
3139     if(pc->overread){
3140         printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
3141         printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
3142     }
3143 #endif
3144
3145     return 0;
3146 }
3147
3148 void ff_mpeg_flush(AVCodecContext *avctx){
3149     int i;
3150     MpegEncContext *s = avctx->priv_data;
3151     
3152     for(i=0; i<MAX_PICTURE_COUNT; i++){
3153        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
3154                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
3155         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
3156     }
3157     s->last_picture_ptr = s->next_picture_ptr = NULL;
3158     
3159     s->parse_context.state= -1;
3160     s->parse_context.frame_start_found= 0;
3161     s->parse_context.overread= 0;
3162     s->parse_context.overread_index= 0;
3163     s->parse_context.index= 0;
3164     s->parse_context.last_index= 0;
3165 }
3166
3167 #ifdef CONFIG_ENCODERS
3168 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
3169 {
3170     int bytes= length>>4;
3171     int bits= length&15;
3172     int i;
3173
3174     if(length==0) return;
3175
3176     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
3177     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
3178 }
3179
3180 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
3181     int i;
3182
3183     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3184
3185     /* mpeg1 */
3186     d->mb_skip_run= s->mb_skip_run;
3187     for(i=0; i<3; i++)
3188         d->last_dc[i]= s->last_dc[i];
3189     
3190     /* statistics */
3191     d->mv_bits= s->mv_bits;
3192     d->i_tex_bits= s->i_tex_bits;
3193     d->p_tex_bits= s->p_tex_bits;
3194     d->i_count= s->i_count;
3195     d->f_count= s->f_count;
3196     d->b_count= s->b_count;
3197     d->skip_count= s->skip_count;
3198     d->misc_bits= s->misc_bits;
3199     d->last_bits= 0;
3200
3201     d->mb_skiped= 0;
3202     d->qscale= s->qscale;
3203 }
3204
3205 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
3206     int i;
3207
3208     memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
3209     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
3210     
3211     /* mpeg1 */
3212     d->mb_skip_run= s->mb_skip_run;
3213     for(i=0; i<3; i++)
3214         d->last_dc[i]= s->last_dc[i];
3215     
3216     /* statistics */
3217     d->mv_bits= s->mv_bits;
3218     d->i_tex_bits= s->i_tex_bits;
3219     d->p_tex_bits= s->p_tex_bits;
3220     d->i_count= s->i_count;
3221     d->f_count= s->f_count;
3222     d->b_count= s->b_count;
3223     d->skip_count= s->skip_count;
3224     d->misc_bits= s->misc_bits;
3225
3226     d->mb_intra= s->mb_intra;
3227     d->mb_skiped= s->mb_skiped;
3228     d->mv_type= s->mv_type;
3229     d->mv_dir= s->mv_dir;
3230     d->pb= s->pb;
3231     if(s->data_partitioning){
3232         d->pb2= s->pb2;
3233         d->tex_pb= s->tex_pb;
3234     }
3235     d->block= s->block;
3236     for(i=0; i<6; i++)
3237         d->block_last_index[i]= s->block_last_index[i];
3238     d->interlaced_dct= s->interlaced_dct;
3239     d->qscale= s->qscale;
3240 }
3241
3242 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
3243                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
3244                            int *dmin, int *next_block, int motion_x, int motion_y)
3245 {
3246     int score;
3247     uint8_t *dest_backup[3];
3248     
3249     copy_context_before_encode(s, backup, type);
3250
3251     s->block= s->blocks[*next_block];
3252     s->pb= pb[*next_block];
3253     if(s->data_partitioning){
3254         s->pb2   = pb2   [*next_block];
3255         s->tex_pb= tex_pb[*next_block];
3256     }
3257     
3258     if(*next_block){
3259         memcpy(dest_backup, s->dest, sizeof(s->dest));
3260         s->dest[0] = s->me.scratchpad;
3261         s->dest[1] = s->me.scratchpad + 16;
3262         s->dest[2] = s->me.scratchpad + 16 + 8;
3263         assert(2*s->uvlinesize == s->linesize); //should be no prob for encoding
3264         assert(s->linesize >= 64); //FIXME
3265     }
3266
3267     encode_mb(s, motion_x, motion_y);
3268     
3269     score= get_bit_count(&s->pb);
3270     if(s->data_partitioning){
3271         score+= get_bit_count(&s->pb2);
3272         score+= get_bit_count(&s->tex_pb);
3273     }
3274    
3275     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
3276         MPV_decode_mb(s, s->block);
3277
3278         score *= s->lambda2;
3279         score += sse_mb(s) << FF_LAMBDA_SHIFT;
3280     }
3281     
3282     if(*next_block){
3283         memcpy(s->dest, dest_backup, sizeof(s->dest));
3284     }
3285
3286     if(score<*dmin){
3287         *dmin= score;
3288         *next_block^=1;
3289
3290         copy_context_after_encode(best, s, type);
3291     }
3292 }
3293                 
3294 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
3295     uint32_t *sq = squareTbl + 256;
3296     int acc=0;
3297     int x,y;
3298     
3299     if(w==16 && h==16) 
3300         return s->dsp.sse[0](NULL, src1, src2, stride);
3301     else if(w==8 && h==8)
3302         return s->dsp.sse[1](NULL, src1, src2, stride);
3303     
3304     for(y=0; y<h; y++){
3305         for(x=0; x<w; x++){
3306             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
3307         } 
3308     }
3309     
3310     assert(acc>=0);
3311     
3312     return acc;
3313 }
3314
3315 static int sse_mb(MpegEncContext *s){
3316     int w= 16;
3317     int h= 16;
3318
3319     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3320     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3321
3322     if(w==16 && h==16)
3323         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
3324                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
3325                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
3326     else
3327         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
3328                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
3329                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
3330 }
3331
3332 static void encode_picture(MpegEncContext *s, int picture_number)
3333 {
3334     int mb_x, mb_y, pdif = 0;
3335     int i;
3336     int bits;
3337     MpegEncContext best_s, backup_s;
3338     uint8_t bit_buf[2][3000];
3339     uint8_t bit_buf2[2][3000];
3340     uint8_t bit_buf_tex[2][3000];
3341     PutBitContext pb[2], pb2[2], tex_pb[2];
3342
3343     for(i=0; i<2; i++){
3344         init_put_bits(&pb    [i], bit_buf    [i], 3000);
3345         init_put_bits(&pb2   [i], bit_buf2   [i], 3000);
3346         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000);
3347     }
3348
3349     s->picture_number = picture_number;
3350     
3351     /* Reset the average MB variance */
3352     s->current_picture.mb_var_sum = 0;
3353     s->current_picture.mc_mb_var_sum = 0;
3354
3355 #ifdef CONFIG_RISKY
3356     /* we need to initialize some time vars before we can encode b-frames */
3357     // RAL: Condition added for MPEG1VIDEO
3358     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
3359         ff_set_mpeg4_time(s, s->picture_number); 
3360 #endif
3361         
3362     s->scene_change_score=0;
3363     
3364     s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
3365     
3366     if(s->pict_type==I_TYPE){
3367         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3368         else                        s->no_rounding=0;
3369     }else if(s->pict_type!=B_TYPE){
3370         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3371             s->no_rounding ^= 1;          
3372     }
3373     
3374     /* Estimate motion for every MB */
3375     s->mb_intra=0; //for the rate distoration & bit compare functions
3376     if(s->pict_type != I_TYPE){
3377         if(s->pict_type != B_TYPE){
3378             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
3379                 s->me.pre_pass=1;
3380                 s->me.dia_size= s->avctx->pre_dia_size;
3381
3382                 for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
3383                     for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
3384                         s->mb_x = mb_x;
3385                         s->mb_y = mb_y;
3386                         ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
3387                     }
3388                 }
3389                 s->me.pre_pass=0;
3390             }
3391         }
3392
3393         s->me.dia_size= s->avctx->dia_size;
3394         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3395             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
3396             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
3397             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
3398             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
3399             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3400                 s->mb_x = mb_x;
3401                 s->mb_y = mb_y;
3402                 s->block_index[0]+=2;
3403                 s->block_index[1]+=2;
3404                 s->block_index[2]+=2;
3405                 s->block_index[3]+=2;
3406                 
3407                 /* compute motion vector & mb_type and store in context */
3408                 if(s->pict_type==B_TYPE)
3409                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
3410                 else
3411                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
3412             }
3413         }
3414     }else /* if(s->pict_type == I_TYPE) */{
3415         /* I-Frame */
3416         //FIXME do we need to zero them?
3417         memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
3418         memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
3419         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3420         
3421         if(!s->fixed_qscale){
3422             /* finding spatial complexity for I-frame rate control */
3423             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3424                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3425                     int xx = mb_x * 16;
3426                     int yy = mb_y * 16;
3427                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
3428                     int varc;
3429                     int sum = s->dsp.pix_sum(pix, s->linesize);
3430     
3431                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
3432
3433                     s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
3434                     s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
3435                     s->current_picture.mb_var_sum    += varc;
3436                 }
3437             }
3438         }
3439     }
3440     emms_c();
3441
3442     if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
3443         s->pict_type= I_TYPE;
3444         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
3445 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3446     }
3447
3448     if(!s->umvplus){
3449         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
3450             s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
3451         
3452             ff_fix_long_p_mvs(s);
3453         }
3454
3455         if(s->pict_type==B_TYPE){
3456             int a, b;
3457
3458             a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
3459             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
3460             s->f_code = FFMAX(a, b);
3461
3462             a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
3463             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
3464             s->b_code = FFMAX(a, b);
3465
3466             ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
3467             ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
3468             ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
3469             ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
3470         }
3471     }
3472     
3473     if (!s->fixed_qscale) 
3474         s->current_picture.quality = ff_rate_estimate_qscale(s);
3475
3476     if(s->adaptive_quant){
3477 #ifdef CONFIG_RISKY
3478         switch(s->codec_id){
3479         case CODEC_ID_MPEG4:
3480             ff_clean_mpeg4_qscales(s);
3481             break;
3482         case CODEC_ID_H263:
3483         case CODEC_ID_H263P:
3484         case CODEC_ID_FLV1:
3485             ff_clean_h263_qscales(s);
3486             break;
3487         }
3488 #endif
3489
3490         s->lambda= s->lambda_table[0];
3491         //FIXME broken
3492     }else
3493         s->lambda= s->current_picture.quality;
3494 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3495     update_qscale(s);
3496     
3497     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
3498         s->qscale= 3; //reduce cliping problems
3499         
3500     if (s->out_format == FMT_MJPEG) {
3501         /* for mjpeg, we do include qscale in the matrix */
3502         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
3503         for(i=1;i<64;i++){
3504             int j= s->dsp.idct_permutation[i];
3505
3506             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3507         }
3508         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
3509                        s->intra_matrix, s->intra_quant_bias, 8, 8);
3510     }
3511     
3512     //FIXME var duplication
3513     s->current_picture.key_frame= s->pict_type == I_TYPE;
3514     s->current_picture.pict_type= s->pict_type;
3515
3516     if(s->current_picture.key_frame)
3517         s->picture_in_gop_number=0;
3518
3519     s->last_bits= get_bit_count(&s->pb);
3520     switch(s->out_format) {
3521     case FMT_MJPEG:
3522         mjpeg_picture_header(s);
3523         break;
3524 #ifdef CONFIG_RISKY
3525     case FMT_H263:
3526         if (s->codec_id == CODEC_ID_WMV2) 
3527             ff_wmv2_encode_picture_header(s, picture_number);
3528         else if (s->h263_msmpeg4) 
3529             msmpeg4_encode_picture_header(s, picture_number);
3530         else if (s->h263_pred)
3531             mpeg4_encode_picture_header(s, picture_number);
3532         else if (s->h263_rv10) 
3533             rv10_encode_picture_header(s, picture_number);
3534         else if (s->codec_id == CODEC_ID_FLV1)
3535             ff_flv_encode_picture_header(s, picture_number);
3536         else
3537             h263_encode_picture_header(s, picture_number);
3538         break;
3539 #endif
3540     case FMT_MPEG1:
3541         mpeg1_encode_picture_header(s, picture_number);
3542         break;
3543     case FMT_H264:
3544         break;
3545     }
3546     bits= get_bit_count(&s->pb);
3547     s->header_bits= bits - s->last_bits;
3548     s->last_bits= bits;
3549     s->mv_bits=0;
3550     s->misc_bits=0;
3551     s->i_tex_bits=0;
3552     s->p_tex_bits=0;
3553     s->i_count=0;
3554     s->f_count=0;
3555     s->b_count=0;
3556     s->skip_count=0;
3557
3558     for(i=0; i<3; i++){
3559         /* init last dc values */
3560         /* note: quant matrix value (8) is implied here */
3561         s->last_dc[i] = 128;
3562         
3563         s->current_picture_ptr->error[i] = 0;
3564     }
3565     s->mb_skip_run = 0;
3566     s->last_mv[0][0][0] = 0;
3567     s->last_mv[0][0][1] = 0;
3568     s->last_mv[1][0][0] = 0;
3569     s->last_mv[1][0][1] = 0;
3570      
3571     s->last_mv_dir = 0;
3572
3573 #ifdef CONFIG_RISKY
3574     switch(s->codec_id){
3575     case CODEC_ID_H263:
3576     case CODEC_ID_H263P:
3577     case CODEC_ID_FLV1:
3578         s->gob_index = ff_h263_get_gob_height(s);
3579         break;
3580     case CODEC_ID_MPEG4:
3581         if(s->partitioned_frame)
3582             ff_mpeg4_init_partitions(s);
3583         break;
3584     }
3585 #endif
3586
3587     s->resync_mb_x=0;
3588     s->resync_mb_y=0;
3589     s->first_slice_line = 1;
3590     s->ptr_lastgob = s->pb.buf;
3591     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
3592         s->mb_x=0;
3593         s->mb_y= mb_y;
3594
3595         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
3596         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
3597         ff_init_block_index(s);
3598         
3599         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
3600             const int xy= mb_y*s->mb_stride + mb_x;
3601             int mb_type= s->mb_type[xy];
3602 //            int d;
3603             int dmin= INT_MAX;
3604
3605             s->mb_x = mb_x;
3606             ff_update_block_index(s);
3607
3608             /* write gob / video packet header  */
3609 #ifdef CONFIG_RISKY
3610             if(s->rtp_mode && mb_y + mb_x>0){
3611                 int current_packet_size, is_gob_start;
3612                 
3613                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
3614                 is_gob_start=0;
3615                 
3616                 if(s->codec_id==CODEC_ID_MPEG4){
3617                     if(current_packet_size >= s->rtp_payload_size){
3618
3619                         if(s->partitioned_frame){
3620                             ff_mpeg4_merge_partitions(s);
3621                             ff_mpeg4_init_partitions(s);
3622                         }
3623                         ff_mpeg4_encode_video_packet_header(s);
3624
3625                         if(s->flags&CODEC_FLAG_PASS1){
3626                             int bits= get_bit_count(&s->pb);
3627                             s->misc_bits+= bits - s->last_bits;
3628                             s->last_bits= bits;
3629                         }
3630                         ff_mpeg4_clean_buffers(s);
3631                         is_gob_start=1;
3632                     }
3633                 }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
3634                     if(   current_packet_size >= s->rtp_payload_size 
3635                        && s->mb_skip_run==0){
3636                         ff_mpeg1_encode_slice_header(s);
3637                         ff_mpeg1_clean_buffers(s);
3638                         is_gob_start=1;
3639                     }
3640                 }else if(s->codec_id==CODEC_ID_MPEG2VIDEO){
3641                     if(   (   current_packet_size >= s->rtp_payload_size || mb_x==0)
3642                        && s->mb_skip_run==0){
3643                         ff_mpeg1_encode_slice_header(s);
3644                         ff_mpeg1_clean_buffers(s);
3645                         is_gob_start=1;
3646                     }
3647                 }else{
3648                     if(current_packet_size >= s->rtp_payload_size
3649                        && s->mb_x==0 && s->mb_y%s->gob_index==0){
3650                        
3651                         h263_encode_gob_header(s, mb_y);                       
3652                         is_gob_start=1;
3653                     }
3654                 }
3655
3656                 if(is_gob_start){
3657                     s->ptr_lastgob = pbBufPtr(&s->pb);
3658                     s->first_slice_line=1;
3659                     s->resync_mb_x=mb_x;
3660                     s->resync_mb_y=mb_y;
3661                 }
3662             }
3663 #endif
3664
3665             if(  (s->resync_mb_x   == s->mb_x)
3666                && s->resync_mb_y+1 == s->mb_y){
3667                 s->first_slice_line=0; 
3668             }
3669
3670             s->mb_skiped=0;
3671
3672             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
3673                 int next_block=0;
3674                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3675
3676                 copy_context_before_encode(&backup_s, s, -1);
3677                 backup_s.pb= s->pb;
3678                 best_s.data_partitioning= s->data_partitioning;
3679                 best_s.partitioned_frame= s->partitioned_frame;
3680                 if(s->data_partitioning){
3681                     backup_s.pb2= s->pb2;
3682                     backup_s.tex_pb= s->tex_pb;
3683                 }
3684
3685                 if(mb_type&MB_TYPE_INTER){
3686                     s->mv_dir = MV_DIR_FORWARD;
3687                     s->mv_type = MV_TYPE_16X16;
3688                     s->mb_intra= 0;
3689                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3690                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3691                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, 
3692                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3693                 }
3694                 if(mb_type&MB_TYPE_SKIPED){
3695                     s->mv_dir = MV_DIR_FORWARD;
3696                     s->mv_type = MV_TYPE_16X16;
3697                     s->mb_intra= 0;
3698                     s->mv[0][0][0] = 0;
3699                     s->mv[0][0][1] = 0;
3700                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_SKIPED, pb, pb2, tex_pb, 
3701                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3702                 }
3703                 if(mb_type&MB_TYPE_INTER4V){                 
3704                     s->mv_dir = MV_DIR_FORWARD;
3705                     s->mv_type = MV_TYPE_8X8;
3706                     s->mb_intra= 0;
3707                     for(i=0; i<4; i++){
3708                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3709                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3710                     }
3711                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, 
3712                                  &dmin, &next_block, 0, 0);
3713                 }
3714                 if(mb_type&MB_TYPE_FORWARD){
3715                     s->mv_dir = MV_DIR_FORWARD;
3716                     s->mv_type = MV_TYPE_16X16;
3717                     s->mb_intra= 0;
3718                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3719                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3720                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, 
3721                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3722                 }
3723                 if(mb_type&MB_TYPE_BACKWARD){
3724                     s->mv_dir = MV_DIR_BACKWARD;
3725                     s->mv_type = MV_TYPE_16X16;
3726                     s->mb_intra= 0;
3727                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3728                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3729                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
3730                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3731                 }
3732                 if(mb_type&MB_TYPE_BIDIR){
3733                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3734                     s->mv_type = MV_TYPE_16X16;
3735                     s->mb_intra= 0;
3736                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3737                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3738                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3739                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3740                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, 
3741                                  &dmin, &next_block, 0, 0);
3742                 }
3743                 if(mb_type&MB_TYPE_DIRECT){
3744                     int mx= s->b_direct_mv_table[xy][0];
3745                     int my= s->b_direct_mv_table[xy][1];
3746                     
3747                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3748                     s->mb_intra= 0;
3749 #ifdef CONFIG_RISKY
3750                     ff_mpeg4_set_direct_mv(s, mx, my);
3751 #endif
3752                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
3753                                  &dmin, &next_block, mx, my);
3754                 }
3755                 if(mb_type&MB_TYPE_INTRA){
3756                     s->mv_dir = 0;
3757                     s->mv_type = MV_TYPE_16X16;
3758                     s->mb_intra= 1;
3759                     s->mv[0][0][0] = 0;
3760                     s->mv[0][0][1] = 0;
3761                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, 
3762                                  &dmin, &next_block, 0, 0);
3763                     if(s->h263_pred || s->h263_aic){
3764                         if(best_s.mb_intra)
3765                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3766                         else
3767                             ff_clean_intra_table_entries(s); //old mode?
3768                     }
3769                 }
3770                 copy_context_after_encode(s, &best_s, -1);
3771                 
3772                 pb_bits_count= get_bit_count(&s->pb);
3773                 flush_put_bits(&s->pb);
3774                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3775                 s->pb= backup_s.pb;
3776                 
3777                 if(s->data_partitioning){
3778                     pb2_bits_count= get_bit_count(&s->pb2);
3779                     flush_put_bits(&s->pb2);
3780                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3781                     s->pb2= backup_s.pb2;
3782                     
3783                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3784                     flush_put_bits(&s->tex_pb);
3785                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3786                     s->tex_pb= backup_s.tex_pb;
3787                 }
3788                 s->last_bits= get_bit_count(&s->pb);
3789                
3790 #ifdef CONFIG_RISKY
3791                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3792                     ff_h263_update_motion_val(s);
3793 #endif
3794         
3795                 if(next_block==0){
3796                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->me.scratchpad     , s->linesize  ,16);
3797                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->me.scratchpad + 16, s->uvlinesize, 8);
3798                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->me.scratchpad + 24, s->uvlinesize, 8);
3799                 }
3800
3801                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3802                     MPV_decode_mb(s, s->block);
3803             } else {
3804                 int motion_x, motion_y;
3805                 int intra_score;
3806                 int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
3807                 
3808               if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
3809                 /* get luma score */
3810                 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
3811                     intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
3812                 }else{
3813                     uint8_t *dest_y;
3814
3815                     int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
3816                     mean*= 0x01010101;
3817                     
3818                     dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
3819                 
3820                     for(i=0; i<16; i++){
3821                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
3822                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
3823                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
3824                         *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
3825                     }
3826
3827                     s->mb_intra=1;
3828                     intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
3829                                         
3830 /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
3831                         s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
3832                         s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
3833                 }
3834                 
3835                 /* get chroma score */
3836                 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
3837                     int i;
3838                     
3839                     s->mb_intra=1;
3840                     for(i=1; i<3; i++){
3841                         uint8_t *dest_c;
3842                         int mean;
3843                         
3844                         if(s->out_format == FMT_H263){
3845                             mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
3846                         }else{
3847                             mean= (s->last_dc[i] + 4)>>3;
3848                         }
3849                         dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
3850                         
3851                         mean*= 0x01010101;
3852                         for(i=0; i<8; i++){
3853                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
3854                             *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
3855                         }
3856                         
3857                         intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
3858                     }                
3859                 }
3860
3861                 /* bias */
3862                 switch(s->avctx->mb_cmp&0xFF){
3863                 default:
3864                 case FF_CMP_SAD:
3865                     intra_score+= 32*s->qscale;
3866                     break;
3867                 case FF_CMP_SSE:
3868                     intra_score+= 24*s->qscale*s->qscale;
3869                     break;
3870                 case FF_CMP_SATD:
3871                     intra_score+= 96*s->qscale;
3872                     break;
3873                 case FF_CMP_DCT:
3874                     intra_score+= 48*s->qscale;
3875                     break;
3876                 case FF_CMP_BIT:
3877                     intra_score+= 16;
3878                     break;
3879                 case FF_CMP_PSNR:
3880                 case FF_CMP_RD:
3881                     intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
3882                     break;
3883                 }
3884
3885                 if(intra_score < inter_score)
3886                     mb_type= MB_TYPE_INTRA;
3887               }  
3888                 
3889                 s->mv_type=MV_TYPE_16X16;
3890                 // only one MB-Type possible
3891                 
3892                 switch(mb_type){
3893                 case MB_TYPE_INTRA:
3894                     s->mv_dir = 0;
3895                     s->mb_intra= 1;
3896                     motion_x= s->mv[0][0][0] = 0;
3897                     motion_y= s->mv[0][0][1] = 0;
3898                     break;
3899                 case MB_TYPE_INTER:
3900                     s->mv_dir = MV_DIR_FORWARD;
3901                     s->mb_intra= 0;
3902                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3903                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3904                     break;
3905                 case MB_TYPE_INTER4V:
3906                     s->mv_dir = MV_DIR_FORWARD;
3907                     s->mv_type = MV_TYPE_8X8;
3908                     s->mb_intra= 0;
3909                     for(i=0; i<4; i++){
3910                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3911                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3912                     }
3913                     motion_x= motion_y= 0;
3914                     break;
3915                 case MB_TYPE_DIRECT:
3916                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3917                     s->mb_intra= 0;
3918                     motion_x=s->b_direct_mv_table[xy][0];
3919                     motion_y=s->b_direct_mv_table[xy][1];
3920 #ifdef CONFIG_RISKY
3921                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3922 #endif
3923                     break;
3924                 case MB_TYPE_BIDIR:
3925                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3926                     s->mb_intra= 0;
3927                     motion_x=0;
3928                     motion_y=0;
3929                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3930                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3931                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3932                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3933                     break;
3934                 case MB_TYPE_BACKWARD:
3935                     s->mv_dir = MV_DIR_BACKWARD;
3936                     s->mb_intra= 0;
3937                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3938                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3939                     break;
3940                 case MB_TYPE_FORWARD:
3941                     s->mv_dir = MV_DIR_FORWARD;
3942                     s->mb_intra= 0;
3943                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3944                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3945 //                    printf(" %d %d ", motion_x, motion_y);
3946                     break;
3947                 default:
3948                     motion_x=motion_y=0; //gcc warning fix
3949                     printf("illegal MB type\n");
3950                 }
3951
3952                 encode_mb(s, motion_x, motion_y);
3953
3954                 // RAL: Update last macrobloc type
3955                 s->last_mv_dir = s->mv_dir;
3956             
3957 #ifdef CONFIG_RISKY
3958                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
3959                     ff_h263_update_motion_val(s);
3960 #endif
3961                 
3962                 MPV_decode_mb(s, s->block);
3963             }
3964
3965             /* clean the MV table in IPS frames for direct mode in B frames */
3966             if(s->mb_intra /* && I,P,S_TYPE */){
3967                 s->p_mv_table[xy][0]=0;
3968                 s->p_mv_table[xy][1]=0;
3969             }
3970             
3971             if(s->flags&CODEC_FLAG_PSNR){
3972                 int w= 16;
3973                 int h= 16;
3974
3975                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3976                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3977
3978                 s->current_picture_ptr->error[0] += sse(
3979                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3980                     s->dest[0], w, h, s->linesize);
3981                 s->current_picture_ptr->error[1] += sse(
3982                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3983                     s->dest[1], w>>1, h>>1, s->uvlinesize);
3984                 s->current_picture_ptr->error[2] += sse(
3985                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3986                     s->dest[2], w>>1, h>>1, s->uvlinesize);
3987             }
3988 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
3989         }
3990     }
3991     emms_c();
3992
3993 #ifdef CONFIG_RISKY
3994     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3995         ff_mpeg4_merge_partitions(s);
3996
3997     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3998         msmpeg4_encode_ext_header(s);
3999
4000     if(s->codec_id==CODEC_ID_MPEG4) 
4001         ff_mpeg4_stuffing(&s->pb);
4002 #endif
4003
4004     //if (s->gob_number)
4005     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
4006     
4007     /* Send the last GOB if RTP */    
4008     if (s->rtp_mode) {
4009         flush_put_bits(&s->pb);
4010         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
4011         /* Call the RTP callback to send the last GOB */
4012         if (s->rtp_callback)
4013             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
4014         s->ptr_lastgob = pbBufPtr(&s->pb);
4015         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
4016     }
4017 }
4018
4019 static int dct_quantize_trellis_c(MpegEncContext *s, 
4020                         DCTELEM *block, int n,
4021                         int qscale, int *overflow){
4022     const int *qmat;
4023     const uint8_t *scantable= s->intra_scantable.scantable;
4024     int max=0;
4025     unsigned int threshold1, threshold2;
4026     int bias=0;
4027     int run_tab[65];
4028     int level_tab[65];
4029     int score_tab[65];
4030     int last_run=0;
4031     int last_level=0;
4032     int last_score= 0;
4033     int last_i= 0;
4034     int not_coded_score= 0;
4035     int coeff[3][64];
4036     int coeff_count[64];
4037     int qmul, qadd, start_i, last_non_zero, i, dc;
4038     const int esc_length= s->ac_esc_length;
4039     uint8_t * length;
4040     uint8_t * last_length;
4041     int score_limit=0;
4042     int left_limit= 0;
4043     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
4044     const int patch_table= s->out_format == FMT_MPEG1 && !s->mb_intra;
4045         
4046     s->dsp.fdct (block);
4047
4048     qmul= qscale*16;
4049     qadd= ((qscale-1)|1)*8;
4050
4051     if (s->mb_intra) {
4052         int q;
4053         if (!s->h263_aic) {
4054             if (n < 4)
4055                 q = s->y_dc_scale;
4056             else
4057                 q = s->c_dc_scale;
4058             q = q << 3;
4059         } else{
4060             /* For AIC we skip quant/dequant of INTRADC */
4061             q = 1 << 3;
4062             qadd=0;
4063         }
4064             
4065         /* note: block[0] is assumed to be positive */
4066         block[0] = (block[0] + (q >> 1)) / q;
4067         start_i = 1;
4068         last_non_zero = 0;
4069         qmat = s->q_intra_matrix[qscale];
4070         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4071             bias= 1<<(QMAT_SHIFT-1);
4072         length     = s->intra_ac_vlc_length;
4073         last_length= s->intra_ac_vlc_last_length;
4074     } else {
4075         start_i = 0;
4076         last_non_zero = -1;
4077         qmat = s->q_inter_matrix[qscale];
4078         length     = s->inter_ac_vlc_length;
4079         last_length= s->inter_ac_vlc_last_length;
4080     }
4081
4082     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4083     threshold2= (threshold1<<1);
4084
4085     for(i=start_i; i<64; i++) {
4086         const int j = scantable[i];
4087         const int k= i-start_i;
4088         int level = block[j];
4089         level = level * qmat[j];
4090
4091 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
4092 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
4093         if(((unsigned)(level+threshold1))>threshold2){
4094             if(level>0){
4095                 level= (bias + level)>>QMAT_SHIFT;
4096                 coeff[0][k]= level;
4097                 coeff[1][k]= level-1;
4098 //                coeff[2][k]= level-2;
4099             }else{
4100                 level= (bias - level)>>QMAT_SHIFT;
4101                 coeff[0][k]= -level;
4102                 coeff[1][k]= -level+1;
4103 //                coeff[2][k]= -level+2;
4104             }
4105             coeff_count[k]= FFMIN(level, 2);
4106             assert(coeff_count[k]);
4107             max |=level;
4108             last_non_zero = i;
4109         }else{
4110             coeff[0][k]= (level>>31)|1;
4111             coeff_count[k]= 1;
4112         }
4113     }
4114     
4115     *overflow= s->max_qcoeff < max; //overflow might have happend
4116     
4117     if(last_non_zero < start_i){
4118         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4119         return last_non_zero;
4120     }
4121
4122     score_tab[0]= 0;
4123     
4124     if(patch_table){
4125 //        length[UNI_AC_ENC_INDEX(0, 63)]=
4126 //        length[UNI_AC_ENC_INDEX(0, 65)]= 2;
4127     }
4128
4129     for(i=0; i<=last_non_zero - start_i; i++){
4130         int level_index, run, j;
4131         const int dct_coeff= block[ scantable[i + start_i] ];
4132         const int zero_distoration= dct_coeff*dct_coeff;
4133         int best_score=256*256*256*120;
4134
4135         last_score += zero_distoration;
4136         not_coded_score += zero_distoration;
4137         for(level_index=0; level_index < coeff_count[i]; level_index++){
4138             int distoration;
4139             int level= coeff[level_index][i];
4140             int unquant_coeff;
4141             
4142             assert(level);
4143
4144             if(s->out_format == FMT_H263){
4145                 if(level>0){
4146                     unquant_coeff= level*qmul + qadd;
4147                 }else{
4148                     unquant_coeff= level*qmul - qadd;
4149                 }
4150             }else{ //MPEG1
4151                 j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
4152                 if(s->mb_intra){
4153                     if (level < 0) {
4154                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
4155                         unquant_coeff = -((unquant_coeff - 1) | 1);
4156                     } else {
4157                         unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
4158                         unquant_coeff =   (unquant_coeff - 1) | 1;
4159                     }
4160                 }else{
4161                     if (level < 0) {
4162                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4163                         unquant_coeff = -((unquant_coeff - 1) | 1);
4164                     } else {
4165                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
4166                         unquant_coeff =   (unquant_coeff - 1) | 1;
4167                     }
4168                 }
4169                 unquant_coeff<<= 3;
4170             }
4171
4172             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
4173             level+=64;
4174             if((level&(~127)) == 0){
4175                 for(run=0; run<=i - left_limit; run++){
4176                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4177                     score += score_tab[i-run];
4178                     
4179                     if(score < best_score){
4180                         best_score= 
4181                         score_tab[i+1]= score;
4182                         run_tab[i+1]= run;
4183                         level_tab[i+1]= level-64;
4184                     }
4185                 }
4186
4187                 if(s->out_format == FMT_H263){
4188                     for(run=0; run<=i - left_limit; run++){
4189                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4190                         score += score_tab[i-run];
4191                         if(score < last_score){
4192                             last_score= score;
4193                             last_run= run;
4194                             last_level= level-64;
4195                             last_i= i+1;
4196                         }
4197                     }
4198                 }
4199             }else{
4200                 distoration += esc_length*lambda;
4201                 for(run=0; run<=i - left_limit; run++){
4202                     int score= distoration + score_tab[i-run];
4203                     
4204                     if(score < best_score){
4205                         best_score= 
4206                         score_tab[i+1]= score;
4207                         run_tab[i+1]= run;
4208                         level_tab[i+1]= level-64;
4209                     }
4210                 }
4211
4212                 if(s->out_format == FMT_H263){
4213                     for(run=0; run<=i - left_limit; run++){
4214                         int score= distoration + score_tab[i-run];
4215                         if(score < last_score){
4216                             last_score= score;
4217                             last_run= run;
4218                             last_level= level-64;
4219                             last_i= i+1;
4220                         }
4221                     }
4222                 }
4223             }
4224         }
4225
4226         for(j=left_limit; j<=i; j++){
4227             score_tab[j] += zero_distoration;
4228         }
4229         score_limit+= zero_distoration;
4230         if(score_tab[i+1] < score_limit)
4231             score_limit= score_tab[i+1];
4232         
4233         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4234         while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
4235     
4236         if(patch_table){
4237 //            length[UNI_AC_ENC_INDEX(0, 63)]=
4238 //            length[UNI_AC_ENC_INDEX(0, 65)]= 3;
4239         }
4240     }
4241
4242     if(s->out_format != FMT_H263){
4243         last_score= 256*256*256*120;
4244         for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
4245             int score= score_tab[i];
4246             if(i) score += lambda*2; //FIXME exacter?
4247
4248             if(score < last_score){
4249                 last_score= score;
4250                 last_i= i;
4251                 last_level= level_tab[i];
4252                 last_run= run_tab[i];
4253             }
4254         }
4255     }
4256
4257     s->coded_score[n] = last_score - not_coded_score;
4258     
4259     dc= block[0];
4260     last_non_zero= last_i - 1 + start_i;
4261     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
4262     
4263     if(last_non_zero < start_i)
4264         return last_non_zero;
4265
4266     if(last_non_zero == 0 && start_i == 0){
4267         int best_level= 0;
4268         int best_score= dc * dc;
4269         
4270         for(i=0; i<coeff_count[0]; i++){
4271             int level= coeff[i][0];
4272             int unquant_coeff, score, distoration;
4273
4274             if(s->out_format == FMT_H263){
4275                 if(level>0){
4276                     unquant_coeff= (level*qmul + qadd)>>3;
4277                 }else{
4278                     unquant_coeff= (level*qmul - qadd)>>3;
4279                 }
4280             }else{ //MPEG1
4281                     if (level < 0) {
4282                         unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4283                         unquant_coeff = -((unquant_coeff - 1) | 1);
4284                     } else {
4285                         unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4286                         unquant_coeff =   (unquant_coeff - 1) | 1;
4287                     }
4288             }
4289             unquant_coeff = (unquant_coeff + 4) >> 3;
4290             unquant_coeff<<= 3 + 3;
4291
4292             distoration= (unquant_coeff - dc) * (unquant_coeff - dc);
4293             level+=64;
4294             if((level&(~127)) == 0)
4295                 score= distoration + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4296             else
4297                 score= distoration + esc_length*lambda;
4298
4299             if(score < best_score){
4300                 best_score= score;
4301                 best_level= level - 64;
4302             }
4303         }
4304         block[0]= best_level;
4305         s->coded_score[n] = best_score - dc*dc;
4306         if(best_level == 0) return -1;
4307         else                return last_non_zero;
4308     }
4309
4310     i= last_i;
4311     assert(last_level);
4312 //FIXME use permutated scantable
4313     block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
4314     i -= last_run + 1;
4315     
4316     for(;i>0 ; i -= run_tab[i] + 1){
4317         const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
4318     
4319         block[j]= level_tab[i];
4320         assert(block[j]);
4321     }
4322
4323     return last_non_zero;
4324 }
4325
4326 static int dct_quantize_c(MpegEncContext *s, 
4327                         DCTELEM *block, int n,
4328                         int qscale, int *overflow)
4329 {
4330     int i, j, level, last_non_zero, q;
4331     const int *qmat;
4332     const uint8_t *scantable= s->intra_scantable.scantable;
4333     int bias;
4334     int max=0;
4335     unsigned int threshold1, threshold2;
4336
4337     s->dsp.fdct (block);
4338
4339     if (s->mb_intra) {
4340         if (!s->h263_aic) {
4341             if (n < 4)
4342                 q = s->y_dc_scale;
4343             else
4344                 q = s->c_dc_scale;
4345             q = q << 3;
4346         } else
4347             /* For AIC we skip quant/dequant of INTRADC */
4348             q = 1 << 3;
4349             
4350         /* note: block[0] is assumed to be positive */
4351         block[0] = (block[0] + (q >> 1)) / q;
4352         i = 1;
4353         last_non_zero = 0;
4354         qmat = s->q_intra_matrix[qscale];
4355         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4356     } else {
4357         i = 0;
4358         last_non_zero = -1;
4359         qmat = s->q_inter_matrix[qscale];
4360         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4361     }
4362     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4363     threshold2= (threshold1<<1);
4364
4365     for(;i<64;i++) {
4366         j = scantable[i];
4367         level = block[j];
4368         level = level * qmat[j];
4369
4370 //        if(   bias+level >= (1<<QMAT_SHIFT)
4371 //           || bias-level >= (1<<QMAT_SHIFT)){
4372         if(((unsigned)(level+threshold1))>threshold2){
4373             if(level>0){
4374                 level= (bias + level)>>QMAT_SHIFT;
4375                 block[j]= level;
4376             }else{
4377                 level= (bias - level)>>QMAT_SHIFT;
4378                 block[j]= -level;
4379             }
4380             max |=level;
4381             last_non_zero = i;
4382         }else{
4383             block[j]=0;
4384         }
4385     }
4386     *overflow= s->max_qcoeff < max; //overflow might have happend
4387     
4388     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4389     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4390         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4391
4392     return last_non_zero;
4393 }
4394
4395 #endif //CONFIG_ENCODERS
4396
4397 static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
4398                                    DCTELEM *block, int n, int qscale)
4399 {
4400     int i, level, nCoeffs;
4401     const uint16_t *quant_matrix;
4402
4403     nCoeffs= s->block_last_index[n];
4404     
4405     if (s->mb_intra) {
4406         if (n < 4) 
4407             block[0] = block[0] * s->y_dc_scale;
4408         else
4409             block[0] = block[0] * s->c_dc_scale;
4410         /* XXX: only mpeg1 */
4411         quant_matrix = s->intra_matrix;
4412         for(i=1;i<=nCoeffs;i++) {
4413             int j= s->intra_scantable.permutated[i];
4414             level = block[j];
4415             if (level) {
4416                 if (level < 0) {
4417                     level = -level;
4418                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4419                     level = (level - 1) | 1;
4420                     level = -level;
4421                 } else {
4422                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4423                     level = (level - 1) | 1;
4424                 }
4425 #ifdef PARANOID
4426                 if (level < -2048 || level > 2047)
4427                     fprintf(stderr, "unquant error %d %d\n", i, level);
4428 #endif
4429                 block[j] = level;
4430             }
4431         }
4432     } else {
4433         i = 0;
4434         quant_matrix = s->inter_matrix;
4435         for(;i<=nCoeffs;i++) {
4436             int j= s->intra_scantable.permutated[i];
4437             level = block[j];
4438             if (level) {
4439                 if (level < 0) {
4440                     level = -level;
4441                     level = (((level << 1) + 1) * qscale *
4442                              ((int) (quant_matrix[j]))) >> 4;
4443                     level = (level - 1) | 1;
4444                     level = -level;
4445                 } else {
4446                     level = (((level << 1) + 1) * qscale *
4447                              ((int) (quant_matrix[j]))) >> 4;
4448                     level = (level - 1) | 1;
4449                 }
4450 #ifdef PARANOID
4451                 if (level < -2048 || level > 2047)
4452                     fprintf(stderr, "unquant error %d %d\n", i, level);
4453 #endif
4454                 block[j] = level;
4455             }
4456         }
4457     }
4458 }
4459
4460 static void dct_unquantize_mpeg2_c(MpegEncContext *s, 
4461                                    DCTELEM *block, int n, int qscale)
4462 {
4463     int i, level, nCoeffs;
4464     const uint16_t *quant_matrix;
4465
4466     if(s->alternate_scan) nCoeffs= 63;
4467     else nCoeffs= s->block_last_index[n];
4468     
4469     if (s->mb_intra) {
4470         if (n < 4) 
4471             block[0] = block[0] * s->y_dc_scale;
4472         else
4473             block[0] = block[0] * s->c_dc_scale;
4474         quant_matrix = s->intra_matrix;
4475         for(i=1;i<=nCoeffs;i++) {
4476             int j= s->intra_scantable.permutated[i];
4477             level = block[j];
4478             if (level) {
4479                 if (level < 0) {
4480                     level = -level;
4481                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4482                     level = -level;
4483                 } else {
4484                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
4485                 }
4486 #ifdef PARANOID
4487                 if (level < -2048 || level > 2047)
4488                     fprintf(stderr, "unquant error %d %d\n", i, level);
4489 #endif
4490                 block[j] = level;
4491             }
4492         }
4493     } else {
4494         int sum=-1;
4495         i = 0;
4496         quant_matrix = s->inter_matrix;
4497         for(;i<=nCoeffs;i++) {
4498             int j= s->intra_scantable.permutated[i];
4499             level = block[j];
4500             if (level) {
4501                 if (level < 0) {
4502                     level = -level;
4503                     level = (((level << 1) + 1) * qscale *
4504                              ((int) (quant_matrix[j]))) >> 4;
4505                     level = -level;
4506                 } else {
4507                     level = (((level << 1) + 1) * qscale *
4508                              ((int) (quant_matrix[j]))) >> 4;
4509                 }
4510 #ifdef PARANOID
4511                 if (level < -2048 || level > 2047)
4512                     fprintf(stderr, "unquant error %d %d\n", i, level);
4513 #endif
4514                 block[j] = level;
4515                 sum+=level;
4516             }
4517         }
4518         block[63]^=sum&1;
4519     }
4520 }
4521
4522
4523 static void dct_unquantize_h263_c(MpegEncContext *s, 
4524                                   DCTELEM *block, int n, int qscale)
4525 {
4526     int i, level, qmul, qadd;
4527     int nCoeffs;
4528     
4529     assert(s->block_last_index[n]>=0);
4530     
4531     qadd = (qscale - 1) | 1;
4532     qmul = qscale << 1;
4533     
4534     if (s->mb_intra) {
4535         if (!s->h263_aic) {
4536             if (n < 4) 
4537                 block[0] = block[0] * s->y_dc_scale;
4538             else
4539                 block[0] = block[0] * s->c_dc_scale;
4540         }else
4541             qadd = 0;
4542         i = 1;
4543         nCoeffs= 63; //does not allways use zigzag table 
4544     } else {
4545         i = 0;
4546         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
4547     }
4548
4549     for(;i<=nCoeffs;i++) {
4550         level = block[i];
4551         if (level) {
4552             if (level < 0) {
4553                 level = level * qmul - qadd;
4554             } else {
4555                 level = level * qmul + qadd;
4556             }
4557 #ifdef PARANOID
4558                 if (level < -2048 || level > 2047)
4559                     fprintf(stderr, "unquant error %d %d\n", i, level);
4560 #endif
4561             block[i] = level;
4562         }
4563     }
4564 }
4565
4566
4567 static const AVOption mpeg4_options[] =
4568 {
4569     AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
4570     AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference"
4571                        "the reference can be CBR (for CBR pass1) or VBR (for pass2)",
4572                        bit_rate_tolerance, 4, 240000000, 8000),
4573     AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2),
4574     AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31),
4575     AVOPTION_CODEC_STRING("rc_eq", "rate control equation",
4576                           rc_eq, "tex^qComp,option1,options2", 0),
4577     AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate",
4578                        rc_min_rate, 4, 24000000, 0),
4579     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
4580                        rc_max_rate, 4, 24000000, 0),
4581     AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
4582                           rc_buffer_aggressivity, 4, 24000000, 0),
4583     AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
4584                           rc_initial_cplx, 0., 9999999., 0),
4585     AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
4586                           i_quant_factor, 0., 0., 0),
4587     AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
4588                           i_quant_factor, -999999., 999999., 0),
4589     AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
4590                        dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
4591     AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
4592                           lumi_masking, 0., 999999., 0),
4593     AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
4594                           temporal_cplx_masking, 0., 999999., 0),
4595     AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
4596                           spatial_cplx_masking, 0., 999999., 0),
4597     AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
4598                           p_masking, 0., 999999., 0),
4599     AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
4600                           dark_masking, 0., 999999., 0),
4601     AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
4602                        idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
4603
4604     AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
4605                        mb_qmin, 0, 8, 0),
4606     AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
4607                        mb_qmin, 0, 8, 0),
4608
4609     AVOPTION_CODEC_INT("me_cmp", "ME compare function",
4610                        me_cmp, 0, 24000000, 0),
4611     AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
4612                        me_sub_cmp, 0, 24000000, 0),
4613
4614
4615     AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
4616                        dia_size, 0, 24000000, 0),
4617     AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
4618                        last_predictor_count, 0, 24000000, 0),
4619
4620     AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
4621                        pre_me, 0, 24000000, 0),
4622     AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
4623                        me_pre_cmp, 0, 24000000, 0),
4624
4625     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4626                        me_range, 0, 24000000, 0),
4627     AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
4628                        pre_dia_size, 0, 24000000, 0),
4629     AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
4630                        me_subpel_quality, 0, 24000000, 0),
4631     AVOPTION_CODEC_INT("me_range", "maximum ME search range",
4632                        me_range, 0, 24000000, 0),
4633     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
4634                         flags, CODEC_FLAG_PSNR, 0),
4635     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
4636                               rc_override),
4637     AVOPTION_SUB(avoptions_common),
4638     AVOPTION_END()
4639 };
4640
4641 #ifdef CONFIG_ENCODERS
4642
4643 AVCodec mpeg1video_encoder = {
4644     "mpeg1video",
4645     CODEC_TYPE_VIDEO,
4646     CODEC_ID_MPEG1VIDEO,
4647     sizeof(MpegEncContext),
4648     MPV_encode_init,
4649     MPV_encode_picture,
4650     MPV_encode_end,
4651 };
4652
4653 #ifdef CONFIG_RISKY
4654
4655 AVCodec mpeg2video_encoder = {
4656     "mpeg2video",
4657     CODEC_TYPE_VIDEO,
4658     CODEC_ID_MPEG2VIDEO,
4659     sizeof(MpegEncContext),
4660     MPV_encode_init,
4661     MPV_encode_picture,
4662     MPV_encode_end,
4663 };
4664
4665 AVCodec h263_encoder = {
4666     "h263",
4667     CODEC_TYPE_VIDEO,
4668     CODEC_ID_H263,
4669     sizeof(MpegEncContext),
4670     MPV_encode_init,
4671     MPV_encode_picture,
4672     MPV_encode_end,
4673 };
4674
4675 AVCodec h263p_encoder = {
4676     "h263p",
4677     CODEC_TYPE_VIDEO,
4678     CODEC_ID_H263P,
4679     sizeof(MpegEncContext),
4680     MPV_encode_init,
4681     MPV_encode_picture,
4682     MPV_encode_end,
4683 };
4684
4685 AVCodec flv_encoder = {
4686     "flv",
4687     CODEC_TYPE_VIDEO,
4688     CODEC_ID_FLV1,
4689     sizeof(MpegEncContext),
4690     MPV_encode_init,
4691     MPV_encode_picture,
4692     MPV_encode_end,
4693 };
4694
4695 AVCodec rv10_encoder = {
4696     "rv10",
4697     CODEC_TYPE_VIDEO,
4698     CODEC_ID_RV10,
4699     sizeof(MpegEncContext),
4700     MPV_encode_init,
4701     MPV_encode_picture,
4702     MPV_encode_end,
4703 };
4704
4705 AVCodec mpeg4_encoder = {
4706     "mpeg4",
4707     CODEC_TYPE_VIDEO,
4708     CODEC_ID_MPEG4,
4709     sizeof(MpegEncContext),
4710     MPV_encode_init,
4711     MPV_encode_picture,
4712     MPV_encode_end,
4713     .options = mpeg4_options,
4714 };
4715
4716 AVCodec msmpeg4v1_encoder = {
4717     "msmpeg4v1",
4718     CODEC_TYPE_VIDEO,
4719     CODEC_ID_MSMPEG4V1,
4720     sizeof(MpegEncContext),
4721     MPV_encode_init,
4722     MPV_encode_picture,
4723     MPV_encode_end,
4724     .options = mpeg4_options,
4725 };
4726
4727 AVCodec msmpeg4v2_encoder = {
4728     "msmpeg4v2",
4729     CODEC_TYPE_VIDEO,
4730     CODEC_ID_MSMPEG4V2,
4731     sizeof(MpegEncContext),
4732     MPV_encode_init,
4733     MPV_encode_picture,
4734     MPV_encode_end,
4735     .options = mpeg4_options,
4736 };
4737
4738 AVCodec msmpeg4v3_encoder = {
4739     "msmpeg4",
4740     CODEC_TYPE_VIDEO,
4741     CODEC_ID_MSMPEG4V3,
4742     sizeof(MpegEncContext),
4743     MPV_encode_init,
4744     MPV_encode_picture,
4745     MPV_encode_end,
4746     .options = mpeg4_options,
4747 };
4748
4749 AVCodec wmv1_encoder = {
4750     "wmv1",
4751     CODEC_TYPE_VIDEO,
4752     CODEC_ID_WMV1,
4753     sizeof(MpegEncContext),
4754     MPV_encode_init,
4755     MPV_encode_picture,
4756     MPV_encode_end,
4757     .options = mpeg4_options,
4758 };
4759
4760 #endif
4761
4762 AVCodec mjpeg_encoder = {
4763     "mjpeg",
4764     CODEC_TYPE_VIDEO,
4765     CODEC_ID_MJPEG,
4766     sizeof(MpegEncContext),
4767     MPV_encode_init,
4768     MPV_encode_picture,
4769     MPV_encode_end,
4770 };
4771
4772 #endif //CONFIG_ENCODERS
4773