]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
Reorder and factorize mb_type ifs, 1 cpu cycle faster and simpler.
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file libavcodec/mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "mpegvideo_common.h"
34 #include "mjpegenc.h"
35 #include "msmpeg4.h"
36 #include "faandct.h"
37 #include "xvmc_internal.h"
38 #include <limits.h>
39
40 //#undef NDEBUG
41 //#include <assert.h>
42
43 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
44                                    DCTELEM *block, int n, int qscale);
45 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
46                                    DCTELEM *block, int n, int qscale);
47 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
48                                    DCTELEM *block, int n, int qscale);
49 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
50                                    DCTELEM *block, int n, int qscale);
51 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
52                                    DCTELEM *block, int n, int qscale);
53 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
54                                   DCTELEM *block, int n, int qscale);
55 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
56                                   DCTELEM *block, int n, int qscale);
57
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64
65 static const uint8_t ff_default_chroma_qscale_table[32]={
66 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
67     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
68 };
69
70 const uint8_t ff_mpeg1_dc_scale_table[128]={
71 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
72     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
73     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
74     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
75     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
76 };
77
78 const enum PixelFormat ff_pixfmt_list_420[] = {
79     PIX_FMT_YUV420P,
80     PIX_FMT_NONE
81 };
82
83 const enum PixelFormat ff_hwaccel_pixfmt_list_420[] = {
84     PIX_FMT_DXVA2_VLD,
85     PIX_FMT_VAAPI_VLD,
86     PIX_FMT_YUV420P,
87     PIX_FMT_NONE
88 };
89
90 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
91     int i;
92
93     assert(p<=end);
94     if(p>=end)
95         return end;
96
97     for(i=0; i<3; i++){
98         uint32_t tmp= *state << 8;
99         *state= tmp + *(p++);
100         if(tmp == 0x100 || p==end)
101             return p;
102     }
103
104     while(p<end){
105         if     (p[-1] > 1      ) p+= 3;
106         else if(p[-2]          ) p+= 2;
107         else if(p[-3]|(p[-1]-1)) p++;
108         else{
109             p++;
110             break;
111         }
112     }
113
114     p= FFMIN(p, end)-4;
115     *state= AV_RB32(p);
116
117     return p+4;
118 }
119
120 /* init common dct for both encoder and decoder */
121 av_cold int ff_dct_common_init(MpegEncContext *s)
122 {
123     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
124     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
125     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
126     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
127     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
128     if(s->flags & CODEC_FLAG_BITEXACT)
129         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
130     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
131
132 #if   HAVE_MMX
133     MPV_common_init_mmx(s);
134 #elif ARCH_ALPHA
135     MPV_common_init_axp(s);
136 #elif CONFIG_MLIB
137     MPV_common_init_mlib(s);
138 #elif HAVE_MMI
139     MPV_common_init_mmi(s);
140 #elif ARCH_ARM
141     MPV_common_init_arm(s);
142 #elif HAVE_ALTIVEC
143     MPV_common_init_altivec(s);
144 #elif ARCH_BFIN
145     MPV_common_init_bfin(s);
146 #endif
147
148     /* load & permutate scantables
149        note: only wmv uses different ones
150     */
151     if(s->alternate_scan){
152         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
153         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
154     }else{
155         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
156         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
157     }
158     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
159     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
160
161     return 0;
162 }
163
164 void ff_copy_picture(Picture *dst, Picture *src){
165     *dst = *src;
166     dst->type= FF_BUFFER_TYPE_COPY;
167 }
168
169 /**
170  * Releases a frame buffer
171  */
172 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
173 {
174     s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
175     av_freep(&pic->hwaccel_picture_private);
176 }
177
178 /**
179  * Allocates a frame buffer
180  */
181 static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
182 {
183     int r;
184
185     if (s->avctx->hwaccel) {
186         assert(!pic->hwaccel_picture_private);
187         if (s->avctx->hwaccel->priv_data_size) {
188             pic->hwaccel_picture_private = av_mallocz(s->avctx->hwaccel->priv_data_size);
189             if (!pic->hwaccel_picture_private) {
190                 av_log(s->avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
191                 return -1;
192             }
193         }
194     }
195
196     r = s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
197
198     if (r<0 || !pic->age || !pic->type || !pic->data[0]) {
199         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
200         av_freep(&pic->hwaccel_picture_private);
201         return -1;
202     }
203
204     if (s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])) {
205         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
206         free_frame_buffer(s, pic);
207         return -1;
208     }
209
210     if (pic->linesize[1] != pic->linesize[2]) {
211         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
212         free_frame_buffer(s, pic);
213         return -1;
214     }
215
216     return 0;
217 }
218
219 /**
220  * allocates a Picture
221  * The pixels are allocated/set by calling get_buffer() if shared=0
222  */
223 int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
224     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
225     const int mb_array_size= s->mb_stride*s->mb_height;
226     const int b8_array_size= s->b8_stride*s->mb_height*2;
227     const int b4_array_size= s->b4_stride*s->mb_height*4;
228     int i;
229     int r= -1;
230
231     if(shared){
232         assert(pic->data[0]);
233         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
234         pic->type= FF_BUFFER_TYPE_SHARED;
235     }else{
236         assert(!pic->data[0]);
237
238         if (alloc_frame_buffer(s, pic) < 0)
239             return -1;
240
241         s->linesize  = pic->linesize[0];
242         s->uvlinesize= pic->linesize[1];
243     }
244
245     if(pic->qscale_table==NULL){
246         if (s->encoding) {
247             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_var   , mb_array_size * sizeof(int16_t)  , fail)
248             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mc_mb_var, mb_array_size * sizeof(int16_t)  , fail)
249             FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_mean  , mb_array_size * sizeof(int8_t )  , fail)
250         }
251
252         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2, fail) //the +2 is for the slice end check
253         FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table , mb_array_size * sizeof(uint8_t)  , fail)
254         FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t), fail)
255         pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
256         if(s->out_format == FMT_H264){
257             for(i=0; i<2; i++){
258                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t), fail)
259                 pic->motion_val[i]= pic->motion_val_base[i]+4;
260                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], b8_array_size * sizeof(uint8_t), fail)
261             }
262             pic->motion_subsample_log2= 2;
263         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
264             for(i=0; i<2; i++){
265                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t), fail)
266                 pic->motion_val[i]= pic->motion_val_base[i]+4;
267                 FF_ALLOCZ_OR_GOTO(s->avctx, pic->ref_index[i], b8_array_size * sizeof(uint8_t), fail)
268             }
269             pic->motion_subsample_log2= 3;
270         }
271         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
272             FF_ALLOCZ_OR_GOTO(s->avctx, pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6, fail)
273         }
274         pic->qstride= s->mb_stride;
275         FF_ALLOCZ_OR_GOTO(s->avctx, pic->pan_scan , 1 * sizeof(AVPanScan), fail)
276     }
277
278     /* It might be nicer if the application would keep track of these
279      * but it would require an API change. */
280     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
281     s->prev_pict_types[0]= s->dropable ? FF_B_TYPE : s->pict_type;
282     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == FF_B_TYPE)
283         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
284
285     return 0;
286 fail: //for the FF_ALLOCZ_OR_GOTO macro
287     if(r>=0)
288         free_frame_buffer(s, pic);
289     return -1;
290 }
291
292 /**
293  * deallocates a picture
294  */
295 static void free_picture(MpegEncContext *s, Picture *pic){
296     int i;
297
298     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
299         free_frame_buffer(s, pic);
300     }
301
302     av_freep(&pic->mb_var);
303     av_freep(&pic->mc_mb_var);
304     av_freep(&pic->mb_mean);
305     av_freep(&pic->mbskip_table);
306     av_freep(&pic->qscale_table);
307     av_freep(&pic->mb_type_base);
308     av_freep(&pic->dct_coeff);
309     av_freep(&pic->pan_scan);
310     pic->mb_type= NULL;
311     for(i=0; i<2; i++){
312         av_freep(&pic->motion_val_base[i]);
313         av_freep(&pic->ref_index[i]);
314     }
315
316     if(pic->type == FF_BUFFER_TYPE_SHARED){
317         for(i=0; i<4; i++){
318             pic->base[i]=
319             pic->data[i]= NULL;
320         }
321         pic->type= 0;
322     }
323 }
324
325 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
326     int i;
327
328     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
329     FF_ALLOCZ_OR_GOTO(s->avctx, s->allocated_edge_emu_buffer, (s->width+64)*2*21*2, fail); //(width + edge + align)*interlaced*MBsize*tolerance
330     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
331
332      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
333     FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t), fail)
334     s->me.temp=         s->me.scratchpad;
335     s->rd_scratchpad=   s->me.scratchpad;
336     s->b_scratchpad=    s->me.scratchpad;
337     s->obmc_scratchpad= s->me.scratchpad + 16;
338     if (s->encoding) {
339         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.map      , ME_MAP_SIZE*sizeof(uint32_t), fail)
340         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t), fail)
341         if(s->avctx->noise_reduction){
342             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_error_sum, 2 * 64 * sizeof(int), fail)
343         }
344     }
345     FF_ALLOCZ_OR_GOTO(s->avctx, s->blocks, 64*12*2 * sizeof(DCTELEM), fail)
346     s->block= s->blocks[0];
347
348     for(i=0;i<12;i++){
349         s->pblocks[i] = &s->block[i];
350     }
351     return 0;
352 fail:
353     return -1; //free() through MPV_common_end()
354 }
355
356 static void free_duplicate_context(MpegEncContext *s){
357     if(s==NULL) return;
358
359     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
360     av_freep(&s->me.scratchpad);
361     s->me.temp=
362     s->rd_scratchpad=
363     s->b_scratchpad=
364     s->obmc_scratchpad= NULL;
365
366     av_freep(&s->dct_error_sum);
367     av_freep(&s->me.map);
368     av_freep(&s->me.score_map);
369     av_freep(&s->blocks);
370     s->block= NULL;
371 }
372
373 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
374 #define COPY(a) bak->a= src->a
375     COPY(allocated_edge_emu_buffer);
376     COPY(edge_emu_buffer);
377     COPY(me.scratchpad);
378     COPY(me.temp);
379     COPY(rd_scratchpad);
380     COPY(b_scratchpad);
381     COPY(obmc_scratchpad);
382     COPY(me.map);
383     COPY(me.score_map);
384     COPY(blocks);
385     COPY(block);
386     COPY(start_mb_y);
387     COPY(end_mb_y);
388     COPY(me.map_generation);
389     COPY(pb);
390     COPY(dct_error_sum);
391     COPY(dct_count[0]);
392     COPY(dct_count[1]);
393 #undef COPY
394 }
395
396 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
397     MpegEncContext bak;
398     int i;
399     //FIXME copy only needed parts
400 //START_TIMER
401     backup_duplicate_context(&bak, dst);
402     memcpy(dst, src, sizeof(MpegEncContext));
403     backup_duplicate_context(dst, &bak);
404     for(i=0;i<12;i++){
405         dst->pblocks[i] = &dst->block[i];
406     }
407 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
408 }
409
410 /**
411  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
412  * the changed fields will not depend upon the prior state of the MpegEncContext.
413  */
414 void MPV_common_defaults(MpegEncContext *s){
415     s->y_dc_scale_table=
416     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
417     s->chroma_qscale_table= ff_default_chroma_qscale_table;
418     s->progressive_frame= 1;
419     s->progressive_sequence= 1;
420     s->picture_structure= PICT_FRAME;
421
422     s->coded_picture_number = 0;
423     s->picture_number = 0;
424     s->input_picture_number = 0;
425
426     s->picture_in_gop_number = 0;
427
428     s->f_code = 1;
429     s->b_code = 1;
430 }
431
432 /**
433  * sets the given MpegEncContext to defaults for decoding.
434  * the changed fields will not depend upon the prior state of the MpegEncContext.
435  */
436 void MPV_decode_defaults(MpegEncContext *s){
437     MPV_common_defaults(s);
438 }
439
440 /**
441  * init common structure for both encoder and decoder.
442  * this assumes that some variables like width/height are already set
443  */
444 av_cold int MPV_common_init(MpegEncContext *s)
445 {
446     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
447
448     if(s->codec_id == CODEC_ID_MPEG2VIDEO && !s->progressive_sequence)
449         s->mb_height = (s->height + 31) / 32 * 2;
450     else
451         s->mb_height = (s->height + 15) / 16;
452
453     if(s->avctx->pix_fmt == PIX_FMT_NONE){
454         av_log(s->avctx, AV_LOG_ERROR, "decoding to PIX_FMT_NONE is not supported.\n");
455         return -1;
456     }
457
458     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
459         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
460         return -1;
461     }
462
463     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
464         return -1;
465
466     dsputil_init(&s->dsp, s->avctx);
467     ff_dct_common_init(s);
468
469     s->flags= s->avctx->flags;
470     s->flags2= s->avctx->flags2;
471
472     s->mb_width  = (s->width  + 15) / 16;
473     s->mb_stride = s->mb_width + 1;
474     s->b8_stride = s->mb_width*2 + 1;
475     s->b4_stride = s->mb_width*4 + 1;
476     mb_array_size= s->mb_height * s->mb_stride;
477     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
478
479     /* set chroma shifts */
480     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
481                                                     &(s->chroma_y_shift) );
482
483     /* set default edge pos, will be overriden in decode_header if needed */
484     s->h_edge_pos= s->mb_width*16;
485     s->v_edge_pos= s->mb_height*16;
486
487     s->mb_num = s->mb_width * s->mb_height;
488
489     s->block_wrap[0]=
490     s->block_wrap[1]=
491     s->block_wrap[2]=
492     s->block_wrap[3]= s->b8_stride;
493     s->block_wrap[4]=
494     s->block_wrap[5]= s->mb_stride;
495
496     y_size = s->b8_stride * (2 * s->mb_height + 1);
497     c_size = s->mb_stride * (s->mb_height + 1);
498     yc_size = y_size + 2 * c_size;
499
500     /* convert fourcc to upper case */
501     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
502                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
503                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
504                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
505
506     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
507                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
508                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
509                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
510
511     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
512
513     FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num+1)*sizeof(int), fail) //error ressilience code looks cleaner with this
514     for(y=0; y<s->mb_height; y++){
515         for(x=0; x<s->mb_width; x++){
516             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
517         }
518     }
519     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
520
521     if (s->encoding) {
522         /* Allocate MV tables */
523         FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t), fail)
524         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
525         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t), fail)
526         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
527         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t), fail)
528         FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t), fail)
529         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
530         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
531         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
532         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
533         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
534         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
535
536         if(s->msmpeg4_version){
537             FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int), fail);
538         }
539         FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
540
541         /* Allocate MB type table */
542         FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type  , mb_array_size * sizeof(uint16_t), fail) //needed for encoding
543
544         FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size * sizeof(int), fail)
545
546         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix  , 64*32   * sizeof(int), fail)
547         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix  , 64*32   * sizeof(int), fail)
548         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t), fail)
549         FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t), fail)
550         FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
551         FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*), fail)
552
553         if(s->avctx->noise_reduction){
554             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset, 2 * 64 * sizeof(uint16_t), fail)
555         }
556     }
557     FF_ALLOCZ_OR_GOTO(s->avctx, s->picture, MAX_PICTURE_COUNT * sizeof(Picture), fail)
558     for(i = 0; i < MAX_PICTURE_COUNT; i++) {
559         avcodec_get_frame_defaults((AVFrame *)&s->picture[i]);
560     }
561
562     FF_ALLOCZ_OR_GOTO(s->avctx, s->error_status_table, mb_array_size*sizeof(uint8_t), fail)
563
564     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
565         /* interlaced direct mode decoding tables */
566             for(i=0; i<2; i++){
567                 int j, k;
568                 for(j=0; j<2; j++){
569                     for(k=0; k<2; k++){
570                         FF_ALLOCZ_OR_GOTO(s->avctx,    s->b_field_mv_table_base[i][j][k], mv_table_size * 2 * sizeof(int16_t), fail)
571                         s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] + s->mb_stride + 1;
572                     }
573                     FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j], mb_array_size * 2 * sizeof(uint8_t), fail)
574                     FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j], mv_table_size * 2 * sizeof(int16_t), fail)
575                     s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j]+ s->mb_stride + 1;
576                 }
577                 FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i], mb_array_size * 2 * sizeof(uint8_t), fail)
578             }
579     }
580     if (s->out_format == FMT_H263) {
581         /* ac values */
582         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_val_base, yc_size * sizeof(int16_t) * 16, fail);
583         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
584         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
585         s->ac_val[2] = s->ac_val[1] + c_size;
586
587         /* cbp values */
588         FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
589         s->coded_block= s->coded_block_base + s->b8_stride + 1;
590
591         /* cbp, ac_pred, pred_dir */
592         FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table     , mb_array_size * sizeof(uint8_t), fail)
593         FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table, mb_array_size * sizeof(uint8_t), fail)
594     }
595
596     if (s->h263_pred || s->h263_plus || !s->encoding) {
597         /* dc values */
598         //MN: we need these for error resilience of intra-frames
599         FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base, yc_size * sizeof(int16_t), fail);
600         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
601         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
602         s->dc_val[2] = s->dc_val[1] + c_size;
603         for(i=0;i<yc_size;i++)
604             s->dc_val_base[i] = 1024;
605     }
606
607     /* which mb is a intra block */
608     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbintra_table, mb_array_size, fail);
609     memset(s->mbintra_table, 1, mb_array_size);
610
611     /* init macroblock skip table */
612     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size+2, fail);
613     //Note the +1 is for a quicker mpeg4 slice_end detection
614     FF_ALLOCZ_OR_GOTO(s->avctx, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE, fail);
615
616     s->parse_context.state= -1;
617     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
618        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
619        s->visualization_buffer[1] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
620        s->visualization_buffer[2] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
621     }
622
623     s->context_initialized = 1;
624
625     s->thread_context[0]= s;
626     threads = s->avctx->thread_count;
627
628     for(i=1; i<threads; i++){
629         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
630         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
631     }
632
633     for(i=0; i<threads; i++){
634         if(init_duplicate_context(s->thread_context[i], s) < 0)
635            goto fail;
636         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
637         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
638     }
639
640     return 0;
641  fail:
642     MPV_common_end(s);
643     return -1;
644 }
645
646 /* init common structure for both encoder and decoder */
647 void MPV_common_end(MpegEncContext *s)
648 {
649     int i, j, k;
650
651     for(i=0; i<s->avctx->thread_count; i++){
652         free_duplicate_context(s->thread_context[i]);
653     }
654     for(i=1; i<s->avctx->thread_count; i++){
655         av_freep(&s->thread_context[i]);
656     }
657
658     av_freep(&s->parse_context.buffer);
659     s->parse_context.buffer_size=0;
660
661     av_freep(&s->mb_type);
662     av_freep(&s->p_mv_table_base);
663     av_freep(&s->b_forw_mv_table_base);
664     av_freep(&s->b_back_mv_table_base);
665     av_freep(&s->b_bidir_forw_mv_table_base);
666     av_freep(&s->b_bidir_back_mv_table_base);
667     av_freep(&s->b_direct_mv_table_base);
668     s->p_mv_table= NULL;
669     s->b_forw_mv_table= NULL;
670     s->b_back_mv_table= NULL;
671     s->b_bidir_forw_mv_table= NULL;
672     s->b_bidir_back_mv_table= NULL;
673     s->b_direct_mv_table= NULL;
674     for(i=0; i<2; i++){
675         for(j=0; j<2; j++){
676             for(k=0; k<2; k++){
677                 av_freep(&s->b_field_mv_table_base[i][j][k]);
678                 s->b_field_mv_table[i][j][k]=NULL;
679             }
680             av_freep(&s->b_field_select_table[i][j]);
681             av_freep(&s->p_field_mv_table_base[i][j]);
682             s->p_field_mv_table[i][j]=NULL;
683         }
684         av_freep(&s->p_field_select_table[i]);
685     }
686
687     av_freep(&s->dc_val_base);
688     av_freep(&s->ac_val_base);
689     av_freep(&s->coded_block_base);
690     av_freep(&s->mbintra_table);
691     av_freep(&s->cbp_table);
692     av_freep(&s->pred_dir_table);
693
694     av_freep(&s->mbskip_table);
695     av_freep(&s->prev_pict_types);
696     av_freep(&s->bitstream_buffer);
697     s->allocated_bitstream_buffer_size=0;
698
699     av_freep(&s->avctx->stats_out);
700     av_freep(&s->ac_stats);
701     av_freep(&s->error_status_table);
702     av_freep(&s->mb_index2xy);
703     av_freep(&s->lambda_table);
704     av_freep(&s->q_intra_matrix);
705     av_freep(&s->q_inter_matrix);
706     av_freep(&s->q_intra_matrix16);
707     av_freep(&s->q_inter_matrix16);
708     av_freep(&s->input_picture);
709     av_freep(&s->reordered_input_picture);
710     av_freep(&s->dct_offset);
711
712     if(s->picture){
713         for(i=0; i<MAX_PICTURE_COUNT; i++){
714             free_picture(s, &s->picture[i]);
715         }
716     }
717     av_freep(&s->picture);
718     s->context_initialized = 0;
719     s->last_picture_ptr=
720     s->next_picture_ptr=
721     s->current_picture_ptr= NULL;
722     s->linesize= s->uvlinesize= 0;
723
724     for(i=0; i<3; i++)
725         av_freep(&s->visualization_buffer[i]);
726
727     avcodec_default_free_buffers(s->avctx);
728 }
729
730 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
731 {
732     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
733     uint8_t index_run[MAX_RUN+1];
734     int last, run, level, start, end, i;
735
736     /* If table is static, we can quit if rl->max_level[0] is not NULL */
737     if(static_store && rl->max_level[0])
738         return;
739
740     /* compute max_level[], max_run[] and index_run[] */
741     for(last=0;last<2;last++) {
742         if (last == 0) {
743             start = 0;
744             end = rl->last;
745         } else {
746             start = rl->last;
747             end = rl->n;
748         }
749
750         memset(max_level, 0, MAX_RUN + 1);
751         memset(max_run, 0, MAX_LEVEL + 1);
752         memset(index_run, rl->n, MAX_RUN + 1);
753         for(i=start;i<end;i++) {
754             run = rl->table_run[i];
755             level = rl->table_level[i];
756             if (index_run[run] == rl->n)
757                 index_run[run] = i;
758             if (level > max_level[run])
759                 max_level[run] = level;
760             if (run > max_run[level])
761                 max_run[level] = run;
762         }
763         if(static_store)
764             rl->max_level[last] = static_store[last];
765         else
766             rl->max_level[last] = av_malloc(MAX_RUN + 1);
767         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
768         if(static_store)
769             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
770         else
771             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
772         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
773         if(static_store)
774             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
775         else
776             rl->index_run[last] = av_malloc(MAX_RUN + 1);
777         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
778     }
779 }
780
781 void init_vlc_rl(RLTable *rl)
782 {
783     int i, q;
784
785     for(q=0; q<32; q++){
786         int qmul= q*2;
787         int qadd= (q-1)|1;
788
789         if(q==0){
790             qmul=1;
791             qadd=0;
792         }
793         for(i=0; i<rl->vlc.table_size; i++){
794             int code= rl->vlc.table[i][0];
795             int len = rl->vlc.table[i][1];
796             int level, run;
797
798             if(len==0){ // illegal code
799                 run= 66;
800                 level= MAX_LEVEL;
801             }else if(len<0){ //more bits needed
802                 run= 0;
803                 level= code;
804             }else{
805                 if(code==rl->n){ //esc
806                     run= 66;
807                     level= 0;
808                 }else{
809                     run=   rl->table_run  [code] + 1;
810                     level= rl->table_level[code] * qmul + qadd;
811                     if(code >= rl->last) run+=192;
812                 }
813             }
814             rl->rl_vlc[q][i].len= len;
815             rl->rl_vlc[q][i].level= level;
816             rl->rl_vlc[q][i].run= run;
817         }
818     }
819 }
820
821 int ff_find_unused_picture(MpegEncContext *s, int shared){
822     int i;
823
824     if(shared){
825         for(i=0; i<MAX_PICTURE_COUNT; i++){
826             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
827         }
828     }else{
829         for(i=0; i<MAX_PICTURE_COUNT; i++){
830             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
831         }
832         for(i=0; i<MAX_PICTURE_COUNT; i++){
833             if(s->picture[i].data[0]==NULL) return i;
834         }
835     }
836
837     av_log(s->avctx, AV_LOG_FATAL, "Internal error, picture buffer overflow\n");
838     /* We could return -1, but the codec would crash trying to draw into a
839      * non-existing frame anyway. This is safer than waiting for a random crash.
840      * Also the return of this is never useful, an encoder must only allocate
841      * as much as allowed in the specification. This has no relationship to how
842      * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
843      * enough for such valid streams).
844      * Plus, a decoder has to check stream validity and remove frames if too
845      * many reference frames are around. Waiting for "OOM" is not correct at
846      * all. Similarly, missing reference frames have to be replaced by
847      * interpolated/MC frames, anything else is a bug in the codec ...
848      */
849     abort();
850     return -1;
851 }
852
853 static void update_noise_reduction(MpegEncContext *s){
854     int intra, i;
855
856     for(intra=0; intra<2; intra++){
857         if(s->dct_count[intra] > (1<<16)){
858             for(i=0; i<64; i++){
859                 s->dct_error_sum[intra][i] >>=1;
860             }
861             s->dct_count[intra] >>= 1;
862         }
863
864         for(i=0; i<64; i++){
865             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
866         }
867     }
868 }
869
870 /**
871  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
872  */
873 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
874 {
875     int i;
876     Picture *pic;
877     s->mb_skipped = 0;
878
879     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
880
881     /* mark&release old frames */
882     if (s->pict_type != FF_B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
883       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
884           free_frame_buffer(s, s->last_picture_ptr);
885
886         /* release forgotten pictures */
887         /* if(mpeg124/h263) */
888         if(!s->encoding){
889             for(i=0; i<MAX_PICTURE_COUNT; i++){
890                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
891                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
892                     free_frame_buffer(s, &s->picture[i]);
893                 }
894             }
895         }
896       }
897     }
898
899     if(!s->encoding){
900         /* release non reference frames */
901         for(i=0; i<MAX_PICTURE_COUNT; i++){
902             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
903                 free_frame_buffer(s, &s->picture[i]);
904             }
905         }
906
907         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
908             pic= s->current_picture_ptr; //we already have a unused image (maybe it was set before reading the header)
909         else{
910             i= ff_find_unused_picture(s, 0);
911             pic= &s->picture[i];
912         }
913
914         pic->reference= 0;
915         if (!s->dropable){
916             if (s->codec_id == CODEC_ID_H264)
917                 pic->reference = s->picture_structure;
918             else if (s->pict_type != FF_B_TYPE)
919                 pic->reference = 3;
920         }
921
922         pic->coded_picture_number= s->coded_picture_number++;
923
924         if(ff_alloc_picture(s, pic, 0) < 0)
925             return -1;
926
927         s->current_picture_ptr= pic;
928         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
929         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
930     }
931
932     s->current_picture_ptr->pict_type= s->pict_type;
933 //    if(s->flags && CODEC_FLAG_QSCALE)
934   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
935     s->current_picture_ptr->key_frame= s->pict_type == FF_I_TYPE;
936
937     ff_copy_picture(&s->current_picture, s->current_picture_ptr);
938
939     if (s->pict_type != FF_B_TYPE) {
940         s->last_picture_ptr= s->next_picture_ptr;
941         if(!s->dropable)
942             s->next_picture_ptr= s->current_picture_ptr;
943     }
944 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
945         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
946         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
947         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
948         s->pict_type, s->dropable);*/
949
950     if(s->codec_id != CODEC_ID_H264){
951         if((s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && s->pict_type!=FF_I_TYPE){
952             av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
953             /* Allocate a dummy frame */
954             i= ff_find_unused_picture(s, 0);
955             s->last_picture_ptr= &s->picture[i];
956             if(ff_alloc_picture(s, s->last_picture_ptr, 0) < 0)
957                 return -1;
958         }
959         if((s->next_picture_ptr==NULL || s->next_picture_ptr->data[0]==NULL) && s->pict_type==FF_B_TYPE){
960             /* Allocate a dummy frame */
961             i= ff_find_unused_picture(s, 0);
962             s->next_picture_ptr= &s->picture[i];
963             if(ff_alloc_picture(s, s->next_picture_ptr, 0) < 0)
964                 return -1;
965         }
966     }
967
968     if(s->last_picture_ptr) ff_copy_picture(&s->last_picture, s->last_picture_ptr);
969     if(s->next_picture_ptr) ff_copy_picture(&s->next_picture, s->next_picture_ptr);
970
971     assert(s->pict_type == FF_I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
972
973     if(s->picture_structure!=PICT_FRAME && s->out_format != FMT_H264){
974         int i;
975         for(i=0; i<4; i++){
976             if(s->picture_structure == PICT_BOTTOM_FIELD){
977                  s->current_picture.data[i] += s->current_picture.linesize[i];
978             }
979             s->current_picture.linesize[i] *= 2;
980             s->last_picture.linesize[i] *=2;
981             s->next_picture.linesize[i] *=2;
982         }
983     }
984
985     s->hurry_up= s->avctx->hurry_up;
986     s->error_recognition= avctx->error_recognition;
987
988     /* set dequantizer, we can't do it during init as it might change for mpeg4
989        and we can't do it in the header decode as init is not called for mpeg4 there yet */
990     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
991         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
992         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
993     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
994         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
995         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
996     }else{
997         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
998         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
999     }
1000
1001     if(s->dct_error_sum){
1002         assert(s->avctx->noise_reduction && s->encoding);
1003
1004         update_noise_reduction(s);
1005     }
1006
1007     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
1008         return ff_xvmc_field_start(s, avctx);
1009
1010     return 0;
1011 }
1012
1013 /* generic function for encode/decode called after a frame has been coded/decoded */
1014 void MPV_frame_end(MpegEncContext *s)
1015 {
1016     int i;
1017     /* draw edge for correct motion prediction if outside */
1018     //just to make sure that all data is rendered.
1019     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1020         ff_xvmc_field_end(s);
1021     }else if(!s->avctx->hwaccel
1022        && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1023        && s->unrestricted_mv
1024        && s->current_picture.reference
1025        && !s->intra_only
1026        && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1027             s->dsp.draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1028             s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1029             s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1030     }
1031     emms_c();
1032
1033     s->last_pict_type    = s->pict_type;
1034     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1035     if(s->pict_type!=FF_B_TYPE){
1036         s->last_non_b_pict_type= s->pict_type;
1037     }
1038 #if 0
1039         /* copy back current_picture variables */
1040     for(i=0; i<MAX_PICTURE_COUNT; i++){
1041         if(s->picture[i].data[0] == s->current_picture.data[0]){
1042             s->picture[i]= s->current_picture;
1043             break;
1044         }
1045     }
1046     assert(i<MAX_PICTURE_COUNT);
1047 #endif
1048
1049     if(s->encoding){
1050         /* release non-reference frames */
1051         for(i=0; i<MAX_PICTURE_COUNT; i++){
1052             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1053                 free_frame_buffer(s, &s->picture[i]);
1054             }
1055         }
1056     }
1057     // clear copies, to avoid confusion
1058 #if 0
1059     memset(&s->last_picture, 0, sizeof(Picture));
1060     memset(&s->next_picture, 0, sizeof(Picture));
1061     memset(&s->current_picture, 0, sizeof(Picture));
1062 #endif
1063     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1064 }
1065
1066 /**
1067  * draws an line from (ex, ey) -> (sx, sy).
1068  * @param w width of the image
1069  * @param h height of the image
1070  * @param stride stride/linesize of the image
1071  * @param color color of the arrow
1072  */
1073 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1074     int x, y, fr, f;
1075
1076     sx= av_clip(sx, 0, w-1);
1077     sy= av_clip(sy, 0, h-1);
1078     ex= av_clip(ex, 0, w-1);
1079     ey= av_clip(ey, 0, h-1);
1080
1081     buf[sy*stride + sx]+= color;
1082
1083     if(FFABS(ex - sx) > FFABS(ey - sy)){
1084         if(sx > ex){
1085             FFSWAP(int, sx, ex);
1086             FFSWAP(int, sy, ey);
1087         }
1088         buf+= sx + sy*stride;
1089         ex-= sx;
1090         f= ((ey-sy)<<16)/ex;
1091         for(x= 0; x <= ex; x++){
1092             y = (x*f)>>16;
1093             fr= (x*f)&0xFFFF;
1094             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1095             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1096         }
1097     }else{
1098         if(sy > ey){
1099             FFSWAP(int, sx, ex);
1100             FFSWAP(int, sy, ey);
1101         }
1102         buf+= sx + sy*stride;
1103         ey-= sy;
1104         if(ey) f= ((ex-sx)<<16)/ey;
1105         else   f= 0;
1106         for(y= 0; y <= ey; y++){
1107             x = (y*f)>>16;
1108             fr= (y*f)&0xFFFF;
1109             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;
1110             buf[y*stride + x+1]+= (color*         fr )>>16;
1111         }
1112     }
1113 }
1114
1115 /**
1116  * draws an arrow from (ex, ey) -> (sx, sy).
1117  * @param w width of the image
1118  * @param h height of the image
1119  * @param stride stride/linesize of the image
1120  * @param color color of the arrow
1121  */
1122 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1123     int dx,dy;
1124
1125     sx= av_clip(sx, -100, w+100);
1126     sy= av_clip(sy, -100, h+100);
1127     ex= av_clip(ex, -100, w+100);
1128     ey= av_clip(ey, -100, h+100);
1129
1130     dx= ex - sx;
1131     dy= ey - sy;
1132
1133     if(dx*dx + dy*dy > 3*3){
1134         int rx=  dx + dy;
1135         int ry= -dx + dy;
1136         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1137
1138         //FIXME subpixel accuracy
1139         rx= ROUNDED_DIV(rx*3<<4, length);
1140         ry= ROUNDED_DIV(ry*3<<4, length);
1141
1142         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1143         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1144     }
1145     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1146 }
1147
1148 /**
1149  * prints debuging info for the given picture.
1150  */
1151 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1152
1153     if(s->avctx->hwaccel || !pict || !pict->mb_type) return;
1154
1155     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1156         int x,y;
1157
1158         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1159         switch (pict->pict_type) {
1160             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1161             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1162             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1163             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1164             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1165             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1166         }
1167         for(y=0; y<s->mb_height; y++){
1168             for(x=0; x<s->mb_width; x++){
1169                 if(s->avctx->debug&FF_DEBUG_SKIP){
1170                     int count= s->mbskip_table[x + y*s->mb_stride];
1171                     if(count>9) count=9;
1172                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1173                 }
1174                 if(s->avctx->debug&FF_DEBUG_QP){
1175                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1176                 }
1177                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1178                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1179                     //Type & MV direction
1180                     if(IS_PCM(mb_type))
1181                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1182                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1183                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1184                     else if(IS_INTRA4x4(mb_type))
1185                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1186                     else if(IS_INTRA16x16(mb_type))
1187                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1188                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1189                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1190                     else if(IS_DIRECT(mb_type))
1191                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1192                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1193                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1194                     else if(IS_GMC(mb_type))
1195                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1196                     else if(IS_SKIP(mb_type))
1197                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1198                     else if(!USES_LIST(mb_type, 1))
1199                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1200                     else if(!USES_LIST(mb_type, 0))
1201                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1202                     else{
1203                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1204                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1205                     }
1206
1207                     //segmentation
1208                     if(IS_8X8(mb_type))
1209                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1210                     else if(IS_16X8(mb_type))
1211                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1212                     else if(IS_8X16(mb_type))
1213                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1214                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1215                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1216                     else
1217                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1218
1219
1220                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1221                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1222                     else
1223                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1224                 }
1225 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1226             }
1227             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1228         }
1229     }
1230
1231     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1232         const int shift= 1 + s->quarter_sample;
1233         int mb_y;
1234         uint8_t *ptr;
1235         int i;
1236         int h_chroma_shift, v_chroma_shift, block_height;
1237         const int width = s->avctx->width;
1238         const int height= s->avctx->height;
1239         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1240         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1241         s->low_delay=0; //needed to see the vectors without trashing the buffers
1242
1243         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1244         for(i=0; i<3; i++){
1245             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1246             pict->data[i]= s->visualization_buffer[i];
1247         }
1248         pict->type= FF_BUFFER_TYPE_COPY;
1249         ptr= pict->data[0];
1250         block_height = 16>>v_chroma_shift;
1251
1252         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1253             int mb_x;
1254             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1255                 const int mb_index= mb_x + mb_y*s->mb_stride;
1256                 if((s->avctx->debug_mv) && pict->motion_val){
1257                   int type;
1258                   for(type=0; type<3; type++){
1259                     int direction = 0;
1260                     switch (type) {
1261                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1262                                 continue;
1263                               direction = 0;
1264                               break;
1265                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1266                                 continue;
1267                               direction = 0;
1268                               break;
1269                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1270                                 continue;
1271                               direction = 1;
1272                               break;
1273                     }
1274                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1275                         continue;
1276
1277                     if(IS_8X8(pict->mb_type[mb_index])){
1278                       int i;
1279                       for(i=0; i<4; i++){
1280                         int sx= mb_x*16 + 4 + 8*(i&1);
1281                         int sy= mb_y*16 + 4 + 8*(i>>1);
1282                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1283                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1284                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1285                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1286                       }
1287                     }else if(IS_16X8(pict->mb_type[mb_index])){
1288                       int i;
1289                       for(i=0; i<2; i++){
1290                         int sx=mb_x*16 + 8;
1291                         int sy=mb_y*16 + 4 + 8*i;
1292                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1293                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1294                         int my=(pict->motion_val[direction][xy][1]>>shift);
1295
1296                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1297                             my*=2;
1298
1299                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1300                       }
1301                     }else if(IS_8X16(pict->mb_type[mb_index])){
1302                       int i;
1303                       for(i=0; i<2; i++){
1304                         int sx=mb_x*16 + 4 + 8*i;
1305                         int sy=mb_y*16 + 8;
1306                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1307                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1308                         int my=(pict->motion_val[direction][xy][1]>>shift);
1309
1310                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1311                             my*=2;
1312
1313                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1314                       }
1315                     }else{
1316                       int sx= mb_x*16 + 8;
1317                       int sy= mb_y*16 + 8;
1318                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1319                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1320                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1321                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1322                     }
1323                   }
1324                 }
1325                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1326                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1327                     int y;
1328                     for(y=0; y<block_height; y++){
1329                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= c;
1330                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= c;
1331                     }
1332                 }
1333                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1334                     int mb_type= pict->mb_type[mb_index];
1335                     uint64_t u,v;
1336                     int y;
1337 #define COLOR(theta, r)\
1338 u= (int)(128 + r*cos(theta*3.141592/180));\
1339 v= (int)(128 + r*sin(theta*3.141592/180));
1340
1341
1342                     u=v=128;
1343                     if(IS_PCM(mb_type)){
1344                         COLOR(120,48)
1345                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1346                         COLOR(30,48)
1347                     }else if(IS_INTRA4x4(mb_type)){
1348                         COLOR(90,48)
1349                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1350 //                        COLOR(120,48)
1351                     }else if(IS_DIRECT(mb_type)){
1352                         COLOR(150,48)
1353                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1354                         COLOR(170,48)
1355                     }else if(IS_GMC(mb_type)){
1356                         COLOR(190,48)
1357                     }else if(IS_SKIP(mb_type)){
1358 //                        COLOR(180,48)
1359                     }else if(!USES_LIST(mb_type, 1)){
1360                         COLOR(240,48)
1361                     }else if(!USES_LIST(mb_type, 0)){
1362                         COLOR(0,48)
1363                     }else{
1364                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1365                         COLOR(300,48)
1366                     }
1367
1368                     u*= 0x0101010101010101ULL;
1369                     v*= 0x0101010101010101ULL;
1370                     for(y=0; y<block_height; y++){
1371                         *(uint64_t*)(pict->data[1] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[1])= u;
1372                         *(uint64_t*)(pict->data[2] + 8*mb_x + (block_height*mb_y + y)*pict->linesize[2])= v;
1373                     }
1374
1375                     //segmentation
1376                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1377                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1378                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1379                     }
1380                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1381                         for(y=0; y<16; y++)
1382                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1383                     }
1384                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1385                         int dm= 1 << (mv_sample_log2-2);
1386                         for(i=0; i<4; i++){
1387                             int sx= mb_x*16 + 8*(i&1);
1388                             int sy= mb_y*16 + 8*(i>>1);
1389                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1390                             //FIXME bidir
1391                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1392                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1393                                 for(y=0; y<8; y++)
1394                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1395                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1396                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1397                         }
1398                     }
1399
1400                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1401                         // hmm
1402                     }
1403                 }
1404                 s->mbskip_table[mb_index]=0;
1405             }
1406         }
1407     }
1408 }
1409
1410 static inline int hpel_motion_lowres(MpegEncContext *s,
1411                                   uint8_t *dest, uint8_t *src,
1412                                   int field_based, int field_select,
1413                                   int src_x, int src_y,
1414                                   int width, int height, int stride,
1415                                   int h_edge_pos, int v_edge_pos,
1416                                   int w, int h, h264_chroma_mc_func *pix_op,
1417                                   int motion_x, int motion_y)
1418 {
1419     const int lowres= s->avctx->lowres;
1420     const int op_index= FFMIN(lowres, 2);
1421     const int s_mask= (2<<lowres)-1;
1422     int emu=0;
1423     int sx, sy;
1424
1425     if(s->quarter_sample){
1426         motion_x/=2;
1427         motion_y/=2;
1428     }
1429
1430     sx= motion_x & s_mask;
1431     sy= motion_y & s_mask;
1432     src_x += motion_x >> (lowres+1);
1433     src_y += motion_y >> (lowres+1);
1434
1435     src += src_y * stride + src_x;
1436
1437     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1438        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1439         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1440                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1441         src= s->edge_emu_buffer;
1442         emu=1;
1443     }
1444
1445     sx= (sx << 2) >> lowres;
1446     sy= (sy << 2) >> lowres;
1447     if(field_select)
1448         src += s->linesize;
1449     pix_op[op_index](dest, src, stride, h, sx, sy);
1450     return emu;
1451 }
1452
1453 /* apply one mpeg motion vector to the three components */
1454 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1455                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1456                                int field_based, int bottom_field, int field_select,
1457                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1458                                int motion_x, int motion_y, int h, int mb_y)
1459 {
1460     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1461     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1462     const int lowres= s->avctx->lowres;
1463     const int op_index= FFMIN(lowres, 2);
1464     const int block_s= 8>>lowres;
1465     const int s_mask= (2<<lowres)-1;
1466     const int h_edge_pos = s->h_edge_pos >> lowres;
1467     const int v_edge_pos = s->v_edge_pos >> lowres;
1468     linesize   = s->current_picture.linesize[0] << field_based;
1469     uvlinesize = s->current_picture.linesize[1] << field_based;
1470
1471     if(s->quarter_sample){ //FIXME obviously not perfect but qpel will not work in lowres anyway
1472         motion_x/=2;
1473         motion_y/=2;
1474     }
1475
1476     if(field_based){
1477         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1478     }
1479
1480     sx= motion_x & s_mask;
1481     sy= motion_y & s_mask;
1482     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1483     src_y =(   mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1484
1485     if (s->out_format == FMT_H263) {
1486         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1487         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1488         uvsrc_x = src_x>>1;
1489         uvsrc_y = src_y>>1;
1490     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1491         mx = motion_x / 4;
1492         my = motion_y / 4;
1493         uvsx = (2*mx) & s_mask;
1494         uvsy = (2*my) & s_mask;
1495         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1496         uvsrc_y =    mb_y*block_s               + (my >> lowres);
1497     } else {
1498         mx = motion_x / 2;
1499         my = motion_y / 2;
1500         uvsx = mx & s_mask;
1501         uvsy = my & s_mask;
1502         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1503         uvsrc_y =(   mb_y*block_s>>field_based) + (my >> (lowres+1));
1504     }
1505
1506     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1507     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1508     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1509
1510     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1511        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1512             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1513                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1514             ptr_y = s->edge_emu_buffer;
1515             if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1516                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1517                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1518                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1519                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1520                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1521                 ptr_cb= uvbuf;
1522                 ptr_cr= uvbuf+16;
1523             }
1524     }
1525
1526     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1527         dest_y += s->linesize;
1528         dest_cb+= s->uvlinesize;
1529         dest_cr+= s->uvlinesize;
1530     }
1531
1532     if(field_select){
1533         ptr_y += s->linesize;
1534         ptr_cb+= s->uvlinesize;
1535         ptr_cr+= s->uvlinesize;
1536     }
1537
1538     sx= (sx << 2) >> lowres;
1539     sy= (sy << 2) >> lowres;
1540     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1541
1542     if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1543         uvsx= (uvsx << 2) >> lowres;
1544         uvsy= (uvsy << 2) >> lowres;
1545         pix_op[op_index](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1546         pix_op[op_index](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1547     }
1548     //FIXME h261 lowres loop filter
1549 }
1550
1551 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1552                                      uint8_t *dest_cb, uint8_t *dest_cr,
1553                                      uint8_t **ref_picture,
1554                                      h264_chroma_mc_func *pix_op,
1555                                      int mx, int my){
1556     const int lowres= s->avctx->lowres;
1557     const int op_index= FFMIN(lowres, 2);
1558     const int block_s= 8>>lowres;
1559     const int s_mask= (2<<lowres)-1;
1560     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1561     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1562     int emu=0, src_x, src_y, offset, sx, sy;
1563     uint8_t *ptr;
1564
1565     if(s->quarter_sample){
1566         mx/=2;
1567         my/=2;
1568     }
1569
1570     /* In case of 8X8, we construct a single chroma motion vector
1571        with a special rounding */
1572     mx= ff_h263_round_chroma(mx);
1573     my= ff_h263_round_chroma(my);
1574
1575     sx= mx & s_mask;
1576     sy= my & s_mask;
1577     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1578     src_y = s->mb_y*block_s + (my >> (lowres+1));
1579
1580     offset = src_y * s->uvlinesize + src_x;
1581     ptr = ref_picture[1] + offset;
1582     if(s->flags&CODEC_FLAG_EMU_EDGE){
1583         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1584            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1585             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1586             ptr= s->edge_emu_buffer;
1587             emu=1;
1588         }
1589     }
1590     sx= (sx << 2) >> lowres;
1591     sy= (sy << 2) >> lowres;
1592     pix_op[op_index](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1593
1594     ptr = ref_picture[2] + offset;
1595     if(emu){
1596         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1597         ptr= s->edge_emu_buffer;
1598     }
1599     pix_op[op_index](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1600 }
1601
1602 /**
1603  * motion compensation of a single macroblock
1604  * @param s context
1605  * @param dest_y luma destination pointer
1606  * @param dest_cb chroma cb/u destination pointer
1607  * @param dest_cr chroma cr/v destination pointer
1608  * @param dir direction (0->forward, 1->backward)
1609  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1610  * @param pic_op halfpel motion compensation function (average or put normally)
1611  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1612  */
1613 static inline void MPV_motion_lowres(MpegEncContext *s,
1614                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1615                               int dir, uint8_t **ref_picture,
1616                               h264_chroma_mc_func *pix_op)
1617 {
1618     int mx, my;
1619     int mb_x, mb_y, i;
1620     const int lowres= s->avctx->lowres;
1621     const int block_s= 8>>lowres;
1622
1623     mb_x = s->mb_x;
1624     mb_y = s->mb_y;
1625
1626     switch(s->mv_type) {
1627     case MV_TYPE_16X16:
1628         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1629                     0, 0, 0,
1630                     ref_picture, pix_op,
1631                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y);
1632         break;
1633     case MV_TYPE_8X8:
1634         mx = 0;
1635         my = 0;
1636             for(i=0;i<4;i++) {
1637                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1638                             ref_picture[0], 0, 0,
1639                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1640                             s->width, s->height, s->linesize,
1641                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1642                             block_s, block_s, pix_op,
1643                             s->mv[dir][i][0], s->mv[dir][i][1]);
1644
1645                 mx += s->mv[dir][i][0];
1646                 my += s->mv[dir][i][1];
1647             }
1648
1649         if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1650             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1651         break;
1652     case MV_TYPE_FIELD:
1653         if (s->picture_structure == PICT_FRAME) {
1654             /* top field */
1655             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1656                         1, 0, s->field_select[dir][0],
1657                         ref_picture, pix_op,
1658                         s->mv[dir][0][0], s->mv[dir][0][1], block_s, mb_y);
1659             /* bottom field */
1660             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1661                         1, 1, s->field_select[dir][1],
1662                         ref_picture, pix_op,
1663                         s->mv[dir][1][0], s->mv[dir][1][1], block_s, mb_y);
1664         } else {
1665             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != FF_B_TYPE && !s->first_field){
1666                 ref_picture= s->current_picture_ptr->data;
1667             }
1668
1669             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1670                         0, 0, s->field_select[dir][0],
1671                         ref_picture, pix_op,
1672                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s, mb_y>>1);
1673         }
1674         break;
1675     case MV_TYPE_16X8:
1676         for(i=0; i<2; i++){
1677             uint8_t ** ref2picture;
1678
1679             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == FF_B_TYPE || s->first_field){
1680                 ref2picture= ref_picture;
1681             }else{
1682                 ref2picture= s->current_picture_ptr->data;
1683             }
1684
1685             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1686                         0, 0, s->field_select[dir][i],
1687                         ref2picture, pix_op,
1688                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s, mb_y>>1);
1689
1690             dest_y += 2*block_s*s->linesize;
1691             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1692             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1693         }
1694         break;
1695     case MV_TYPE_DMV:
1696         if(s->picture_structure == PICT_FRAME){
1697             for(i=0; i<2; i++){
1698                 int j;
1699                 for(j=0; j<2; j++){
1700                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1701                                 1, j, j^i,
1702                                 ref_picture, pix_op,
1703                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s, mb_y);
1704                 }
1705                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1706             }
1707         }else{
1708             for(i=0; i<2; i++){
1709                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1710                             0, 0, s->picture_structure != i+1,
1711                             ref_picture, pix_op,
1712                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s, mb_y>>1);
1713
1714                 // after put we make avg of the same block
1715                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1716
1717                 //opposite parity is always in the same frame if this is second field
1718                 if(!s->first_field){
1719                     ref_picture = s->current_picture_ptr->data;
1720                 }
1721             }
1722         }
1723     break;
1724     default: assert(0);
1725     }
1726 }
1727
1728 /* put block[] to dest[] */
1729 static inline void put_dct(MpegEncContext *s,
1730                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1731 {
1732     s->dct_unquantize_intra(s, block, i, qscale);
1733     s->dsp.idct_put (dest, line_size, block);
1734 }
1735
1736 /* add block[] to dest[] */
1737 static inline void add_dct(MpegEncContext *s,
1738                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1739 {
1740     if (s->block_last_index[i] >= 0) {
1741         s->dsp.idct_add (dest, line_size, block);
1742     }
1743 }
1744
1745 static inline void add_dequant_dct(MpegEncContext *s,
1746                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1747 {
1748     if (s->block_last_index[i] >= 0) {
1749         s->dct_unquantize_inter(s, block, i, qscale);
1750
1751         s->dsp.idct_add (dest, line_size, block);
1752     }
1753 }
1754
1755 /**
1756  * cleans dc, ac, coded_block for the current non intra MB
1757  */
1758 void ff_clean_intra_table_entries(MpegEncContext *s)
1759 {
1760     int wrap = s->b8_stride;
1761     int xy = s->block_index[0];
1762
1763     s->dc_val[0][xy           ] =
1764     s->dc_val[0][xy + 1       ] =
1765     s->dc_val[0][xy     + wrap] =
1766     s->dc_val[0][xy + 1 + wrap] = 1024;
1767     /* ac pred */
1768     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1769     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1770     if (s->msmpeg4_version>=3) {
1771         s->coded_block[xy           ] =
1772         s->coded_block[xy + 1       ] =
1773         s->coded_block[xy     + wrap] =
1774         s->coded_block[xy + 1 + wrap] = 0;
1775     }
1776     /* chroma */
1777     wrap = s->mb_stride;
1778     xy = s->mb_x + s->mb_y * wrap;
1779     s->dc_val[1][xy] =
1780     s->dc_val[2][xy] = 1024;
1781     /* ac pred */
1782     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1783     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1784
1785     s->mbintra_table[xy]= 0;
1786 }
1787
1788 /* generic function called after a macroblock has been parsed by the
1789    decoder or after it has been encoded by the encoder.
1790
1791    Important variables used:
1792    s->mb_intra : true if intra macroblock
1793    s->mv_dir   : motion vector direction
1794    s->mv_type  : motion vector type
1795    s->mv       : motion vector
1796    s->interlaced_dct : true if interlaced dct used (mpeg2)
1797  */
1798 static av_always_inline
1799 void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
1800                             int lowres_flag, int is_mpeg12)
1801 {
1802     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1803     if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
1804         ff_xvmc_decode_mb(s);//xvmc uses pblocks
1805         return;
1806     }
1807
1808     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1809        /* save DCT coefficients */
1810        int i,j;
1811        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1812        for(i=0; i<6; i++)
1813            for(j=0; j<64; j++)
1814                *dct++ = block[i][s->dsp.idct_permutation[j]];
1815     }
1816
1817     s->current_picture.qscale_table[mb_xy]= s->qscale;
1818
1819     /* update DC predictors for P macroblocks */
1820     if (!s->mb_intra) {
1821         if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
1822             if(s->mbintra_table[mb_xy])
1823                 ff_clean_intra_table_entries(s);
1824         } else {
1825             s->last_dc[0] =
1826             s->last_dc[1] =
1827             s->last_dc[2] = 128 << s->intra_dc_precision;
1828         }
1829     }
1830     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
1831         s->mbintra_table[mb_xy]=1;
1832
1833     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1834         uint8_t *dest_y, *dest_cb, *dest_cr;
1835         int dct_linesize, dct_offset;
1836         op_pixels_func (*op_pix)[4];
1837         qpel_mc_func (*op_qpix)[16];
1838         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1839         const int uvlinesize= s->current_picture.linesize[1];
1840         const int readable= s->pict_type != FF_B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1841         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1842
1843         /* avoid copy if macroblock skipped in last frame too */
1844         /* skip only during decoding as we might trash the buffers during encoding a bit */
1845         if(!s->encoding){
1846             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1847             const int age= s->current_picture.age;
1848
1849             assert(age);
1850
1851             if (s->mb_skipped) {
1852                 s->mb_skipped= 0;
1853                 assert(s->pict_type!=FF_I_TYPE);
1854
1855                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1856                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1857
1858                 /* if previous was skipped too, then nothing to do !  */
1859                 if (*mbskip_ptr >= age && s->current_picture.reference){
1860                     return;
1861                 }
1862             } else if(!s->current_picture.reference){
1863                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1864                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1865             } else{
1866                 *mbskip_ptr = 0; /* not skipped */
1867             }
1868         }
1869
1870         dct_linesize = linesize << s->interlaced_dct;
1871         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1872
1873         if(readable){
1874             dest_y=  s->dest[0];
1875             dest_cb= s->dest[1];
1876             dest_cr= s->dest[2];
1877         }else{
1878             dest_y = s->b_scratchpad;
1879             dest_cb= s->b_scratchpad+16*linesize;
1880             dest_cr= s->b_scratchpad+32*linesize;
1881         }
1882
1883         if (!s->mb_intra) {
1884             /* motion handling */
1885             /* decoding or more than one mb_type (MC was already done otherwise) */
1886             if(!s->encoding){
1887                 if(lowres_flag){
1888                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1889
1890                     if (s->mv_dir & MV_DIR_FORWARD) {
1891                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1892                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1893                     }
1894                     if (s->mv_dir & MV_DIR_BACKWARD) {
1895                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1896                     }
1897                 }else{
1898                     op_qpix= s->me.qpel_put;
1899                     if ((!s->no_rounding) || s->pict_type==FF_B_TYPE){
1900                         op_pix = s->dsp.put_pixels_tab;
1901                     }else{
1902                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1903                     }
1904                     if (s->mv_dir & MV_DIR_FORWARD) {
1905                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1906                         op_pix = s->dsp.avg_pixels_tab;
1907                         op_qpix= s->me.qpel_avg;
1908                     }
1909                     if (s->mv_dir & MV_DIR_BACKWARD) {
1910                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1911                     }
1912                 }
1913             }
1914
1915             /* skip dequant / idct if we are really late ;) */
1916             if(s->hurry_up>1) goto skip_idct;
1917             if(s->avctx->skip_idct){
1918                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
1919                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
1920                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1921                     goto skip_idct;
1922             }
1923
1924             /* add dct residue */
1925             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1926                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1927                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1928                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1929                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1930                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1931
1932                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1933                     if (s->chroma_y_shift){
1934                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1935                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1936                     }else{
1937                         dct_linesize >>= 1;
1938                         dct_offset >>=1;
1939                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
1940                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
1941                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
1942                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
1943                     }
1944                 }
1945             } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
1946                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
1947                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
1948                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
1949                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
1950
1951                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1952                     if(s->chroma_y_shift){//Chroma420
1953                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
1954                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
1955                     }else{
1956                         //chroma422
1957                         dct_linesize = uvlinesize << s->interlaced_dct;
1958                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
1959
1960                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
1961                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
1962                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
1963                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
1964                         if(!s->chroma_x_shift){//Chroma444
1965                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
1966                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
1967                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
1968                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
1969                         }
1970                     }
1971                 }//fi gray
1972             }
1973             else if (CONFIG_WMV2_DECODER || CONFIG_WMV2_ENCODER) {
1974                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
1975             }
1976         } else {
1977             /* dct only in intra block */
1978             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
1979                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1980                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1981                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1982                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1983
1984                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1985                     if(s->chroma_y_shift){
1986                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1987                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1988                     }else{
1989                         dct_offset >>=1;
1990                         dct_linesize >>=1;
1991                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
1992                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
1993                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
1994                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
1995                     }
1996                 }
1997             }else{
1998                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
1999                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2000                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2001                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2002
2003                 if(!CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2004                     if(s->chroma_y_shift){
2005                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2006                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2007                     }else{
2008
2009                         dct_linesize = uvlinesize << s->interlaced_dct;
2010                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2011
2012                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2013                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2014                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2015                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2016                         if(!s->chroma_x_shift){//Chroma444
2017                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2018                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2019                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2020                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2021                         }
2022                     }
2023                 }//gray
2024             }
2025         }
2026 skip_idct:
2027         if(!readable){
2028             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2029             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2030             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2031         }
2032     }
2033 }
2034
2035 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2036 #if !CONFIG_SMALL
2037     if(s->out_format == FMT_MPEG1) {
2038         if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
2039         else                 MPV_decode_mb_internal(s, block, 0, 1);
2040     } else
2041 #endif
2042     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
2043     else                  MPV_decode_mb_internal(s, block, 0, 0);
2044 }
2045
2046 /**
2047  *
2048  * @param h is the normal height, this will be reduced automatically if needed for the last row
2049  */
2050 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2051     if (s->avctx->draw_horiz_band) {
2052         AVFrame *src;
2053         const int field_pic= s->picture_structure != PICT_FRAME;
2054         int offset[4];
2055
2056         h= FFMIN(h, (s->avctx->height>>field_pic) - y);
2057
2058         if(field_pic && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)){
2059             h <<= 1;
2060             y <<= 1;
2061             if(s->first_field) return;
2062         }
2063
2064         if(s->pict_type==FF_B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2065             src= (AVFrame*)s->current_picture_ptr;
2066         else if(s->last_picture_ptr)
2067             src= (AVFrame*)s->last_picture_ptr;
2068         else
2069             return;
2070
2071         if(s->pict_type==FF_B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2072             offset[0]=
2073             offset[1]=
2074             offset[2]=
2075             offset[3]= 0;
2076         }else{
2077             offset[0]= y * s->linesize;
2078             offset[1]=
2079             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2080             offset[3]= 0;
2081         }
2082
2083         emms_c();
2084
2085         s->avctx->draw_horiz_band(s->avctx, src, offset,
2086                                   y, s->picture_structure, h);
2087     }
2088 }
2089
2090 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2091     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2092     const int uvlinesize= s->current_picture.linesize[1];
2093     const int mb_size= 4 - s->avctx->lowres;
2094
2095     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2096     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2097     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2098     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2099     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2100     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2101     //block_index is not used by mpeg2, so it is not affected by chroma_format
2102
2103     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2104     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2105     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2106
2107     if(!(s->pict_type==FF_B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2108     {
2109         if(s->picture_structure==PICT_FRAME){
2110         s->dest[0] += s->mb_y *   linesize << mb_size;
2111         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2112         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2113         }else{
2114             s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
2115             s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2116             s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
2117             assert((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
2118         }
2119     }
2120 }
2121
2122 void ff_mpeg_flush(AVCodecContext *avctx){
2123     int i;
2124     MpegEncContext *s = avctx->priv_data;
2125
2126     if(s==NULL || s->picture==NULL)
2127         return;
2128
2129     for(i=0; i<MAX_PICTURE_COUNT; i++){
2130        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2131                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2132         free_frame_buffer(s, &s->picture[i]);
2133     }
2134     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2135
2136     s->mb_x= s->mb_y= 0;
2137     s->closed_gop= 0;
2138
2139     s->parse_context.state= -1;
2140     s->parse_context.frame_start_found= 0;
2141     s->parse_context.overread= 0;
2142     s->parse_context.overread_index= 0;
2143     s->parse_context.index= 0;
2144     s->parse_context.last_index= 0;
2145     s->bitstream_buffer_size=0;
2146     s->pp_time=0;
2147 }
2148
2149 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2150                                    DCTELEM *block, int n, int qscale)
2151 {
2152     int i, level, nCoeffs;
2153     const uint16_t *quant_matrix;
2154
2155     nCoeffs= s->block_last_index[n];
2156
2157     if (n < 4)
2158         block[0] = block[0] * s->y_dc_scale;
2159     else
2160         block[0] = block[0] * s->c_dc_scale;
2161     /* XXX: only mpeg1 */
2162     quant_matrix = s->intra_matrix;
2163     for(i=1;i<=nCoeffs;i++) {
2164         int j= s->intra_scantable.permutated[i];
2165         level = block[j];
2166         if (level) {
2167             if (level < 0) {
2168                 level = -level;
2169                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2170                 level = (level - 1) | 1;
2171                 level = -level;
2172             } else {
2173                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2174                 level = (level - 1) | 1;
2175             }
2176             block[j] = level;
2177         }
2178     }
2179 }
2180
2181 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2182                                    DCTELEM *block, int n, int qscale)
2183 {
2184     int i, level, nCoeffs;
2185     const uint16_t *quant_matrix;
2186
2187     nCoeffs= s->block_last_index[n];
2188
2189     quant_matrix = s->inter_matrix;
2190     for(i=0; i<=nCoeffs; i++) {
2191         int j= s->intra_scantable.permutated[i];
2192         level = block[j];
2193         if (level) {
2194             if (level < 0) {
2195                 level = -level;
2196                 level = (((level << 1) + 1) * qscale *
2197                          ((int) (quant_matrix[j]))) >> 4;
2198                 level = (level - 1) | 1;
2199                 level = -level;
2200             } else {
2201                 level = (((level << 1) + 1) * qscale *
2202                          ((int) (quant_matrix[j]))) >> 4;
2203                 level = (level - 1) | 1;
2204             }
2205             block[j] = level;
2206         }
2207     }
2208 }
2209
2210 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2211                                    DCTELEM *block, int n, int qscale)
2212 {
2213     int i, level, nCoeffs;
2214     const uint16_t *quant_matrix;
2215
2216     if(s->alternate_scan) nCoeffs= 63;
2217     else nCoeffs= s->block_last_index[n];
2218
2219     if (n < 4)
2220         block[0] = block[0] * s->y_dc_scale;
2221     else
2222         block[0] = block[0] * s->c_dc_scale;
2223     quant_matrix = s->intra_matrix;
2224     for(i=1;i<=nCoeffs;i++) {
2225         int j= s->intra_scantable.permutated[i];
2226         level = block[j];
2227         if (level) {
2228             if (level < 0) {
2229                 level = -level;
2230                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2231                 level = -level;
2232             } else {
2233                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2234             }
2235             block[j] = level;
2236         }
2237     }
2238 }
2239
2240 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2241                                    DCTELEM *block, int n, int qscale)
2242 {
2243     int i, level, nCoeffs;
2244     const uint16_t *quant_matrix;
2245     int sum=-1;
2246
2247     if(s->alternate_scan) nCoeffs= 63;
2248     else nCoeffs= s->block_last_index[n];
2249
2250     if (n < 4)
2251         block[0] = block[0] * s->y_dc_scale;
2252     else
2253         block[0] = block[0] * s->c_dc_scale;
2254     quant_matrix = s->intra_matrix;
2255     for(i=1;i<=nCoeffs;i++) {
2256         int j= s->intra_scantable.permutated[i];
2257         level = block[j];
2258         if (level) {
2259             if (level < 0) {
2260                 level = -level;
2261                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2262                 level = -level;
2263             } else {
2264                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2265             }
2266             block[j] = level;
2267             sum+=level;
2268         }
2269     }
2270     block[63]^=sum&1;
2271 }
2272
2273 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2274                                    DCTELEM *block, int n, int qscale)
2275 {
2276     int i, level, nCoeffs;
2277     const uint16_t *quant_matrix;
2278     int sum=-1;
2279
2280     if(s->alternate_scan) nCoeffs= 63;
2281     else nCoeffs= s->block_last_index[n];
2282
2283     quant_matrix = s->inter_matrix;
2284     for(i=0; i<=nCoeffs; i++) {
2285         int j= s->intra_scantable.permutated[i];
2286         level = block[j];
2287         if (level) {
2288             if (level < 0) {
2289                 level = -level;
2290                 level = (((level << 1) + 1) * qscale *
2291                          ((int) (quant_matrix[j]))) >> 4;
2292                 level = -level;
2293             } else {
2294                 level = (((level << 1) + 1) * qscale *
2295                          ((int) (quant_matrix[j]))) >> 4;
2296             }
2297             block[j] = level;
2298             sum+=level;
2299         }
2300     }
2301     block[63]^=sum&1;
2302 }
2303
2304 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2305                                   DCTELEM *block, int n, int qscale)
2306 {
2307     int i, level, qmul, qadd;
2308     int nCoeffs;
2309
2310     assert(s->block_last_index[n]>=0);
2311
2312     qmul = qscale << 1;
2313
2314     if (!s->h263_aic) {
2315         if (n < 4)
2316             block[0] = block[0] * s->y_dc_scale;
2317         else
2318             block[0] = block[0] * s->c_dc_scale;
2319         qadd = (qscale - 1) | 1;
2320     }else{
2321         qadd = 0;
2322     }
2323     if(s->ac_pred)
2324         nCoeffs=63;
2325     else
2326         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2327
2328     for(i=1; i<=nCoeffs; i++) {
2329         level = block[i];
2330         if (level) {
2331             if (level < 0) {
2332                 level = level * qmul - qadd;
2333             } else {
2334                 level = level * qmul + qadd;
2335             }
2336             block[i] = level;
2337         }
2338     }
2339 }
2340
2341 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2342                                   DCTELEM *block, int n, int qscale)
2343 {
2344     int i, level, qmul, qadd;
2345     int nCoeffs;
2346
2347     assert(s->block_last_index[n]>=0);
2348
2349     qadd = (qscale - 1) | 1;
2350     qmul = qscale << 1;
2351
2352     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2353
2354     for(i=0; i<=nCoeffs; i++) {
2355         level = block[i];
2356         if (level) {
2357             if (level < 0) {
2358                 level = level * qmul - qadd;
2359             } else {
2360                 level = level * qmul + qadd;
2361             }
2362             block[i] = level;
2363         }
2364     }
2365 }
2366
2367 /**
2368  * set qscale and update qscale dependent variables.
2369  */
2370 void ff_set_qscale(MpegEncContext * s, int qscale)
2371 {
2372     if (qscale < 1)
2373         qscale = 1;
2374     else if (qscale > 31)
2375         qscale = 31;
2376
2377     s->qscale = qscale;
2378     s->chroma_qscale= s->chroma_qscale_table[qscale];
2379
2380     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2381     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2382 }