]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
Replace HAVE_BFIN with ARCH_BFIN. Blackfin is a CPU architecture, not an
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "mpegvideo_common.h"
34 #include "mjpegenc.h"
35 #include "msmpeg4.h"
36 #include "faandct.h"
37 #include <limits.h>
38
39 //#undef NDEBUG
40 //#include <assert.h>
41
42 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
43                                    DCTELEM *block, int n, int qscale);
44 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57
58 #ifdef HAVE_XVMC
59 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
60 extern void XVMC_field_end(MpegEncContext *s);
61 extern void XVMC_decode_mb(MpegEncContext *s);
62 #endif
63
64 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
65
66
67 /* enable all paranoid tests for rounding, overflows, etc... */
68 //#define PARANOID
69
70 //#define DEBUG
71
72
73 static const uint8_t ff_default_chroma_qscale_table[32]={
74 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
75     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
76 };
77
78 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
79     int i;
80     int end;
81
82     st->scantable= src_scantable;
83
84     for(i=0; i<64; i++){
85         int j;
86         j = src_scantable[i];
87         st->permutated[i] = permutation[j];
88 #ifdef ARCH_POWERPC
89         st->inverse[j] = i;
90 #endif
91     }
92
93     end=-1;
94     for(i=0; i<64; i++){
95         int j;
96         j = st->permutated[i];
97         if(j>end) end=j;
98         st->raster_end[i]= end;
99     }
100 }
101
102 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
103     int i;
104
105     assert(p<=end);
106     if(p>=end)
107         return end;
108
109     for(i=0; i<3; i++){
110         uint32_t tmp= *state << 8;
111         *state= tmp + *(p++);
112         if(tmp == 0x100 || p==end)
113             return p;
114     }
115
116     while(p<end){
117         if     (p[-1] > 1      ) p+= 3;
118         else if(p[-2]          ) p+= 2;
119         else if(p[-3]|(p[-1]-1)) p++;
120         else{
121             p++;
122             break;
123         }
124     }
125
126     p= FFMIN(p, end)-4;
127     *state= AV_RB32(p);
128
129     return p+4;
130 }
131
132 /* init common dct for both encoder and decoder */
133 static int DCT_common_init(MpegEncContext *s)
134 {
135     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
136     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
137     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
138     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
139     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
140     if(s->flags & CODEC_FLAG_BITEXACT)
141         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
142     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
143
144 #if defined(HAVE_MMX)
145     MPV_common_init_mmx(s);
146 #elif defined(ARCH_ALPHA)
147     MPV_common_init_axp(s);
148 #elif defined(HAVE_MLIB)
149     MPV_common_init_mlib(s);
150 #elif defined(HAVE_MMI)
151     MPV_common_init_mmi(s);
152 #elif defined(ARCH_ARMV4L)
153     MPV_common_init_armv4l(s);
154 #elif defined(ARCH_POWERPC)
155     MPV_common_init_ppc(s);
156 #elif defined(ARCH_BFIN)
157     MPV_common_init_bfin(s);
158 #endif
159
160     /* load & permutate scantables
161        note: only wmv uses different ones
162     */
163     if(s->alternate_scan){
164         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
165         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
166     }else{
167         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
168         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
169     }
170     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
171     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
172
173     return 0;
174 }
175
176 void copy_picture(Picture *dst, Picture *src){
177     *dst = *src;
178     dst->type= FF_BUFFER_TYPE_COPY;
179 }
180
181 /**
182  * allocates a Picture
183  * The pixels are allocated/set by calling get_buffer() if shared=0
184  */
185 int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
186     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
187     const int mb_array_size= s->mb_stride*s->mb_height;
188     const int b8_array_size= s->b8_stride*s->mb_height*2;
189     const int b4_array_size= s->b4_stride*s->mb_height*4;
190     int i;
191     int r= -1;
192
193     if(shared){
194         assert(pic->data[0]);
195         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
196         pic->type= FF_BUFFER_TYPE_SHARED;
197     }else{
198         assert(!pic->data[0]);
199
200         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
201
202         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
203             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
204             return -1;
205         }
206
207         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
208             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
209             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
210             return -1;
211         }
212
213         if(pic->linesize[1] != pic->linesize[2]){
214             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
215             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
216             return -1;
217         }
218
219         s->linesize  = pic->linesize[0];
220         s->uvlinesize= pic->linesize[1];
221     }
222
223     if(pic->qscale_table==NULL){
224         if (s->encoding) {
225             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
226             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
227             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
228         }
229
230         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
231         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
232         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
233         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
234         if(s->out_format == FMT_H264){
235             for(i=0; i<2; i++){
236                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
237                 pic->motion_val[i]= pic->motion_val_base[i]+4;
238                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
239             }
240             pic->motion_subsample_log2= 2;
241         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
242             for(i=0; i<2; i++){
243                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
244                 pic->motion_val[i]= pic->motion_val_base[i]+4;
245                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
246             }
247             pic->motion_subsample_log2= 3;
248         }
249         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
250             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
251         }
252         pic->qstride= s->mb_stride;
253         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
254     }
255
256     /* It might be nicer if the application would keep track of these
257      * but it would require an API change. */
258     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
259     s->prev_pict_types[0]= s->pict_type;
260     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
261         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
262
263     return 0;
264 fail: //for the CHECKED_ALLOCZ macro
265     if(r>=0)
266         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
267     return -1;
268 }
269
270 /**
271  * deallocates a picture
272  */
273 static void free_picture(MpegEncContext *s, Picture *pic){
274     int i;
275
276     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
277         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
278     }
279
280     av_freep(&pic->mb_var);
281     av_freep(&pic->mc_mb_var);
282     av_freep(&pic->mb_mean);
283     av_freep(&pic->mbskip_table);
284     av_freep(&pic->qscale_table);
285     av_freep(&pic->mb_type_base);
286     av_freep(&pic->dct_coeff);
287     av_freep(&pic->pan_scan);
288     pic->mb_type= NULL;
289     for(i=0; i<2; i++){
290         av_freep(&pic->motion_val_base[i]);
291         av_freep(&pic->ref_index[i]);
292     }
293
294     if(pic->type == FF_BUFFER_TYPE_SHARED){
295         for(i=0; i<4; i++){
296             pic->base[i]=
297             pic->data[i]= NULL;
298         }
299         pic->type= 0;
300     }
301 }
302
303 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
304     int i;
305
306     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
307     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
308     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
309
310      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
311     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
312     s->rd_scratchpad=   s->me.scratchpad;
313     s->b_scratchpad=    s->me.scratchpad;
314     s->obmc_scratchpad= s->me.scratchpad + 16;
315     if (s->encoding) {
316         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
317         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
318         if(s->avctx->noise_reduction){
319             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
320         }
321     }
322     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
323     s->block= s->blocks[0];
324
325     for(i=0;i<12;i++){
326         s->pblocks[i] = (short *)(&s->block[i]);
327     }
328     return 0;
329 fail:
330     return -1; //free() through MPV_common_end()
331 }
332
333 static void free_duplicate_context(MpegEncContext *s){
334     if(s==NULL) return;
335
336     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
337     av_freep(&s->me.scratchpad);
338     s->rd_scratchpad=
339     s->b_scratchpad=
340     s->obmc_scratchpad= NULL;
341
342     av_freep(&s->dct_error_sum);
343     av_freep(&s->me.map);
344     av_freep(&s->me.score_map);
345     av_freep(&s->blocks);
346     s->block= NULL;
347 }
348
349 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
350 #define COPY(a) bak->a= src->a
351     COPY(allocated_edge_emu_buffer);
352     COPY(edge_emu_buffer);
353     COPY(me.scratchpad);
354     COPY(rd_scratchpad);
355     COPY(b_scratchpad);
356     COPY(obmc_scratchpad);
357     COPY(me.map);
358     COPY(me.score_map);
359     COPY(blocks);
360     COPY(block);
361     COPY(start_mb_y);
362     COPY(end_mb_y);
363     COPY(me.map_generation);
364     COPY(pb);
365     COPY(dct_error_sum);
366     COPY(dct_count[0]);
367     COPY(dct_count[1]);
368 #undef COPY
369 }
370
371 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
372     MpegEncContext bak;
373     int i;
374     //FIXME copy only needed parts
375 //START_TIMER
376     backup_duplicate_context(&bak, dst);
377     memcpy(dst, src, sizeof(MpegEncContext));
378     backup_duplicate_context(dst, &bak);
379     for(i=0;i<12;i++){
380         dst->pblocks[i] = (short *)(&dst->block[i]);
381     }
382 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
383 }
384
385 /**
386  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
387  * the changed fields will not depend upon the prior state of the MpegEncContext.
388  */
389 void MPV_common_defaults(MpegEncContext *s){
390     s->y_dc_scale_table=
391     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
392     s->chroma_qscale_table= ff_default_chroma_qscale_table;
393     s->progressive_frame= 1;
394     s->progressive_sequence= 1;
395     s->picture_structure= PICT_FRAME;
396
397     s->coded_picture_number = 0;
398     s->picture_number = 0;
399     s->input_picture_number = 0;
400
401     s->picture_in_gop_number = 0;
402
403     s->f_code = 1;
404     s->b_code = 1;
405 }
406
407 /**
408  * sets the given MpegEncContext to defaults for decoding.
409  * the changed fields will not depend upon the prior state of the MpegEncContext.
410  */
411 void MPV_decode_defaults(MpegEncContext *s){
412     MPV_common_defaults(s);
413 }
414
415 /**
416  * init common structure for both encoder and decoder.
417  * this assumes that some variables like width/height are already set
418  */
419 int MPV_common_init(MpegEncContext *s)
420 {
421     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
422
423     s->mb_height = (s->height + 15) / 16;
424
425     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
426         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
427         return -1;
428     }
429
430     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
431         return -1;
432
433     dsputil_init(&s->dsp, s->avctx);
434     DCT_common_init(s);
435
436     s->flags= s->avctx->flags;
437     s->flags2= s->avctx->flags2;
438
439     s->mb_width  = (s->width  + 15) / 16;
440     s->mb_stride = s->mb_width + 1;
441     s->b8_stride = s->mb_width*2 + 1;
442     s->b4_stride = s->mb_width*4 + 1;
443     mb_array_size= s->mb_height * s->mb_stride;
444     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
445
446     /* set chroma shifts */
447     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
448                                                     &(s->chroma_y_shift) );
449
450     /* set default edge pos, will be overriden in decode_header if needed */
451     s->h_edge_pos= s->mb_width*16;
452     s->v_edge_pos= s->mb_height*16;
453
454     s->mb_num = s->mb_width * s->mb_height;
455
456     s->block_wrap[0]=
457     s->block_wrap[1]=
458     s->block_wrap[2]=
459     s->block_wrap[3]= s->b8_stride;
460     s->block_wrap[4]=
461     s->block_wrap[5]= s->mb_stride;
462
463     y_size = s->b8_stride * (2 * s->mb_height + 1);
464     c_size = s->mb_stride * (s->mb_height + 1);
465     yc_size = y_size + 2 * c_size;
466
467     /* convert fourcc to upper case */
468     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
469                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
470                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
471                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
472
473     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
474                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
475                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
476                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
477
478     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
479
480     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
481     for(y=0; y<s->mb_height; y++){
482         for(x=0; x<s->mb_width; x++){
483             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
484         }
485     }
486     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
487
488     if (s->encoding) {
489         /* Allocate MV tables */
490         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
491         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
492         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
493         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
494         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
495         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
496         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
497         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
498         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
499         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
500         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
501         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
502
503         if(s->msmpeg4_version){
504             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
505         }
506         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
507
508         /* Allocate MB type table */
509         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
510
511         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
512
513         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
514         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
515         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
516         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
517         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
518         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
519
520         if(s->avctx->noise_reduction){
521             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
522         }
523     }
524     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
525
526     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
527
528     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
529         /* interlaced direct mode decoding tables */
530             for(i=0; i<2; i++){
531                 int j, k;
532                 for(j=0; j<2; j++){
533                     for(k=0; k<2; k++){
534                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
535                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
536                     }
537                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
538                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
539                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
540                 }
541                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
542             }
543     }
544     if (s->out_format == FMT_H263) {
545         /* ac values */
546         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
547         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
548         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
549         s->ac_val[2] = s->ac_val[1] + c_size;
550
551         /* cbp values */
552         CHECKED_ALLOCZ(s->coded_block_base, y_size);
553         s->coded_block= s->coded_block_base + s->b8_stride + 1;
554
555         /* cbp, ac_pred, pred_dir */
556         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
557         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
558     }
559
560     if (s->h263_pred || s->h263_plus || !s->encoding) {
561         /* dc values */
562         //MN: we need these for error resilience of intra-frames
563         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
564         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
565         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
566         s->dc_val[2] = s->dc_val[1] + c_size;
567         for(i=0;i<yc_size;i++)
568             s->dc_val_base[i] = 1024;
569     }
570
571     /* which mb is a intra block */
572     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
573     memset(s->mbintra_table, 1, mb_array_size);
574
575     /* init macroblock skip table */
576     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
577     //Note the +1 is for a quicker mpeg4 slice_end detection
578     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
579
580     s->parse_context.state= -1;
581     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
582        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
583        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
584        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
585     }
586
587     s->context_initialized = 1;
588
589     s->thread_context[0]= s;
590     /* h264 does thread context setup itself, but it needs context[0]
591      * to be fully initialized for the error resilience code */
592     threads = s->codec_id == CODEC_ID_H264 ? 1 : s->avctx->thread_count;
593
594     for(i=1; i<threads; i++){
595         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
596         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
597     }
598
599     for(i=0; i<threads; i++){
600         if(init_duplicate_context(s->thread_context[i], s) < 0)
601            goto fail;
602         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
603         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
604     }
605
606     return 0;
607  fail:
608     MPV_common_end(s);
609     return -1;
610 }
611
612 /* init common structure for both encoder and decoder */
613 void MPV_common_end(MpegEncContext *s)
614 {
615     int i, j, k;
616
617     for(i=0; i<s->avctx->thread_count; i++){
618         free_duplicate_context(s->thread_context[i]);
619     }
620     for(i=1; i<s->avctx->thread_count; i++){
621         av_freep(&s->thread_context[i]);
622     }
623
624     av_freep(&s->parse_context.buffer);
625     s->parse_context.buffer_size=0;
626
627     av_freep(&s->mb_type);
628     av_freep(&s->p_mv_table_base);
629     av_freep(&s->b_forw_mv_table_base);
630     av_freep(&s->b_back_mv_table_base);
631     av_freep(&s->b_bidir_forw_mv_table_base);
632     av_freep(&s->b_bidir_back_mv_table_base);
633     av_freep(&s->b_direct_mv_table_base);
634     s->p_mv_table= NULL;
635     s->b_forw_mv_table= NULL;
636     s->b_back_mv_table= NULL;
637     s->b_bidir_forw_mv_table= NULL;
638     s->b_bidir_back_mv_table= NULL;
639     s->b_direct_mv_table= NULL;
640     for(i=0; i<2; i++){
641         for(j=0; j<2; j++){
642             for(k=0; k<2; k++){
643                 av_freep(&s->b_field_mv_table_base[i][j][k]);
644                 s->b_field_mv_table[i][j][k]=NULL;
645             }
646             av_freep(&s->b_field_select_table[i][j]);
647             av_freep(&s->p_field_mv_table_base[i][j]);
648             s->p_field_mv_table[i][j]=NULL;
649         }
650         av_freep(&s->p_field_select_table[i]);
651     }
652
653     av_freep(&s->dc_val_base);
654     av_freep(&s->ac_val_base);
655     av_freep(&s->coded_block_base);
656     av_freep(&s->mbintra_table);
657     av_freep(&s->cbp_table);
658     av_freep(&s->pred_dir_table);
659
660     av_freep(&s->mbskip_table);
661     av_freep(&s->prev_pict_types);
662     av_freep(&s->bitstream_buffer);
663     s->allocated_bitstream_buffer_size=0;
664
665     av_freep(&s->avctx->stats_out);
666     av_freep(&s->ac_stats);
667     av_freep(&s->error_status_table);
668     av_freep(&s->mb_index2xy);
669     av_freep(&s->lambda_table);
670     av_freep(&s->q_intra_matrix);
671     av_freep(&s->q_inter_matrix);
672     av_freep(&s->q_intra_matrix16);
673     av_freep(&s->q_inter_matrix16);
674     av_freep(&s->input_picture);
675     av_freep(&s->reordered_input_picture);
676     av_freep(&s->dct_offset);
677
678     if(s->picture){
679         for(i=0; i<MAX_PICTURE_COUNT; i++){
680             free_picture(s, &s->picture[i]);
681         }
682     }
683     av_freep(&s->picture);
684     s->context_initialized = 0;
685     s->last_picture_ptr=
686     s->next_picture_ptr=
687     s->current_picture_ptr= NULL;
688     s->linesize= s->uvlinesize= 0;
689
690     for(i=0; i<3; i++)
691         av_freep(&s->visualization_buffer[i]);
692
693     avcodec_default_free_buffers(s->avctx);
694 }
695
696 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
697 {
698     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
699     uint8_t index_run[MAX_RUN+1];
700     int last, run, level, start, end, i;
701
702     /* If table is static, we can quit if rl->max_level[0] is not NULL */
703     if(static_store && rl->max_level[0])
704         return;
705
706     /* compute max_level[], max_run[] and index_run[] */
707     for(last=0;last<2;last++) {
708         if (last == 0) {
709             start = 0;
710             end = rl->last;
711         } else {
712             start = rl->last;
713             end = rl->n;
714         }
715
716         memset(max_level, 0, MAX_RUN + 1);
717         memset(max_run, 0, MAX_LEVEL + 1);
718         memset(index_run, rl->n, MAX_RUN + 1);
719         for(i=start;i<end;i++) {
720             run = rl->table_run[i];
721             level = rl->table_level[i];
722             if (index_run[run] == rl->n)
723                 index_run[run] = i;
724             if (level > max_level[run])
725                 max_level[run] = level;
726             if (run > max_run[level])
727                 max_run[level] = run;
728         }
729         if(static_store)
730             rl->max_level[last] = static_store[last];
731         else
732             rl->max_level[last] = av_malloc(MAX_RUN + 1);
733         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
734         if(static_store)
735             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
736         else
737             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
738         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
739         if(static_store)
740             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
741         else
742             rl->index_run[last] = av_malloc(MAX_RUN + 1);
743         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
744     }
745 }
746
747 void init_vlc_rl(RLTable *rl, int use_static)
748 {
749     int i, q;
750
751     /* Return if static table is already initialized */
752     if(use_static && rl->rl_vlc[0])
753         return;
754
755     init_vlc(&rl->vlc, 9, rl->n + 1,
756              &rl->table_vlc[0][1], 4, 2,
757              &rl->table_vlc[0][0], 4, 2, use_static);
758
759
760     for(q=0; q<32; q++){
761         int qmul= q*2;
762         int qadd= (q-1)|1;
763
764         if(q==0){
765             qmul=1;
766             qadd=0;
767         }
768         if(use_static)
769             rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
770         else
771             rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
772         for(i=0; i<rl->vlc.table_size; i++){
773             int code= rl->vlc.table[i][0];
774             int len = rl->vlc.table[i][1];
775             int level, run;
776
777             if(len==0){ // illegal code
778                 run= 66;
779                 level= MAX_LEVEL;
780             }else if(len<0){ //more bits needed
781                 run= 0;
782                 level= code;
783             }else{
784                 if(code==rl->n){ //esc
785                     run= 66;
786                     level= 0;
787                 }else{
788                     run=   rl->table_run  [code] + 1;
789                     level= rl->table_level[code] * qmul + qadd;
790                     if(code >= rl->last) run+=192;
791                 }
792             }
793             rl->rl_vlc[q][i].len= len;
794             rl->rl_vlc[q][i].level= level;
795             rl->rl_vlc[q][i].run= run;
796         }
797     }
798 }
799
800 /* draw the edges of width 'w' of an image of size width, height */
801 //FIXME check that this is ok for mpeg4 interlaced
802 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
803 {
804     uint8_t *ptr, *last_line;
805     int i;
806
807     last_line = buf + (height - 1) * wrap;
808     for(i=0;i<w;i++) {
809         /* top and bottom */
810         memcpy(buf - (i + 1) * wrap, buf, width);
811         memcpy(last_line + (i + 1) * wrap, last_line, width);
812     }
813     /* left and right */
814     ptr = buf;
815     for(i=0;i<height;i++) {
816         memset(ptr - w, ptr[0], w);
817         memset(ptr + width, ptr[width-1], w);
818         ptr += wrap;
819     }
820     /* corners */
821     for(i=0;i<w;i++) {
822         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
823         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
824         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
825         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
826     }
827 }
828
829 int ff_find_unused_picture(MpegEncContext *s, int shared){
830     int i;
831
832     if(shared){
833         for(i=0; i<MAX_PICTURE_COUNT; i++){
834             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
835         }
836     }else{
837         for(i=0; i<MAX_PICTURE_COUNT; i++){
838             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
839         }
840         for(i=0; i<MAX_PICTURE_COUNT; i++){
841             if(s->picture[i].data[0]==NULL) return i;
842         }
843     }
844
845     assert(0);
846     return -1;
847 }
848
849 static void update_noise_reduction(MpegEncContext *s){
850     int intra, i;
851
852     for(intra=0; intra<2; intra++){
853         if(s->dct_count[intra] > (1<<16)){
854             for(i=0; i<64; i++){
855                 s->dct_error_sum[intra][i] >>=1;
856             }
857             s->dct_count[intra] >>= 1;
858         }
859
860         for(i=0; i<64; i++){
861             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
862         }
863     }
864 }
865
866 /**
867  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
868  */
869 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
870 {
871     int i;
872     AVFrame *pic;
873     s->mb_skipped = 0;
874
875     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
876
877     /* mark&release old frames */
878     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
879       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
880         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
881
882         /* release forgotten pictures */
883         /* if(mpeg124/h263) */
884         if(!s->encoding){
885             for(i=0; i<MAX_PICTURE_COUNT; i++){
886                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
887                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
888                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
889                 }
890             }
891         }
892       }
893     }
894 alloc:
895     if(!s->encoding){
896         /* release non reference frames */
897         for(i=0; i<MAX_PICTURE_COUNT; i++){
898             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
899                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
900             }
901         }
902
903         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
904             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
905         else{
906             i= ff_find_unused_picture(s, 0);
907             pic= (AVFrame*)&s->picture[i];
908         }
909
910         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
911                         && !s->dropable ? 3 : 0;
912
913         pic->coded_picture_number= s->coded_picture_number++;
914
915         if( alloc_picture(s, (Picture*)pic, 0) < 0)
916             return -1;
917
918         s->current_picture_ptr= (Picture*)pic;
919         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
920         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
921     }
922
923     s->current_picture_ptr->pict_type= s->pict_type;
924 //    if(s->flags && CODEC_FLAG_QSCALE)
925   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
926     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
927
928     copy_picture(&s->current_picture, s->current_picture_ptr);
929
930     if (s->pict_type != B_TYPE) {
931         s->last_picture_ptr= s->next_picture_ptr;
932         if(!s->dropable)
933             s->next_picture_ptr= s->current_picture_ptr;
934     }
935 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
936         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
937         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
938         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
939         s->pict_type, s->dropable);*/
940
941     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
942     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
943
944     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
945         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
946         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
947         goto alloc;
948     }
949
950     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
951
952     if(s->picture_structure!=PICT_FRAME){
953         int i;
954         for(i=0; i<4; i++){
955             if(s->picture_structure == PICT_BOTTOM_FIELD){
956                  s->current_picture.data[i] += s->current_picture.linesize[i];
957             }
958             s->current_picture.linesize[i] *= 2;
959             s->last_picture.linesize[i] *=2;
960             s->next_picture.linesize[i] *=2;
961         }
962     }
963
964     s->hurry_up= s->avctx->hurry_up;
965     s->error_resilience= avctx->error_resilience;
966
967     /* set dequantizer, we can't do it during init as it might change for mpeg4
968        and we can't do it in the header decode as init is not called for mpeg4 there yet */
969     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
970         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
971         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
972     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
973         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
974         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
975     }else{
976         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
977         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
978     }
979
980     if(s->dct_error_sum){
981         assert(s->avctx->noise_reduction && s->encoding);
982
983         update_noise_reduction(s);
984     }
985
986 #ifdef HAVE_XVMC
987     if(s->avctx->xvmc_acceleration)
988         return XVMC_field_start(s, avctx);
989 #endif
990     return 0;
991 }
992
993 /* generic function for encode/decode called after a frame has been coded/decoded */
994 void MPV_frame_end(MpegEncContext *s)
995 {
996     int i;
997     /* draw edge for correct motion prediction if outside */
998 #ifdef HAVE_XVMC
999 //just to make sure that all data is rendered.
1000     if(s->avctx->xvmc_acceleration){
1001         XVMC_field_end(s);
1002     }else
1003 #endif
1004     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1005             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1006             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1007             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1008     }
1009     emms_c();
1010
1011     s->last_pict_type    = s->pict_type;
1012     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1013     if(s->pict_type!=B_TYPE){
1014         s->last_non_b_pict_type= s->pict_type;
1015     }
1016 #if 0
1017         /* copy back current_picture variables */
1018     for(i=0; i<MAX_PICTURE_COUNT; i++){
1019         if(s->picture[i].data[0] == s->current_picture.data[0]){
1020             s->picture[i]= s->current_picture;
1021             break;
1022         }
1023     }
1024     assert(i<MAX_PICTURE_COUNT);
1025 #endif
1026
1027     if(s->encoding){
1028         /* release non-reference frames */
1029         for(i=0; i<MAX_PICTURE_COUNT; i++){
1030             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1031                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1032             }
1033         }
1034     }
1035     // clear copies, to avoid confusion
1036 #if 0
1037     memset(&s->last_picture, 0, sizeof(Picture));
1038     memset(&s->next_picture, 0, sizeof(Picture));
1039     memset(&s->current_picture, 0, sizeof(Picture));
1040 #endif
1041     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1042 }
1043
1044 /**
1045  * draws an line from (ex, ey) -> (sx, sy).
1046  * @param w width of the image
1047  * @param h height of the image
1048  * @param stride stride/linesize of the image
1049  * @param color color of the arrow
1050  */
1051 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1052     int x, y, fr, f;
1053
1054     sx= av_clip(sx, 0, w-1);
1055     sy= av_clip(sy, 0, h-1);
1056     ex= av_clip(ex, 0, w-1);
1057     ey= av_clip(ey, 0, h-1);
1058
1059     buf[sy*stride + sx]+= color;
1060
1061     if(FFABS(ex - sx) > FFABS(ey - sy)){
1062         if(sx > ex){
1063             FFSWAP(int, sx, ex);
1064             FFSWAP(int, sy, ey);
1065         }
1066         buf+= sx + sy*stride;
1067         ex-= sx;
1068         f= ((ey-sy)<<16)/ex;
1069         for(x= 0; x <= ex; x++){
1070             y = (x*f)>>16;
1071             fr= (x*f)&0xFFFF;
1072             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1073             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1074         }
1075     }else{
1076         if(sy > ey){
1077             FFSWAP(int, sx, ex);
1078             FFSWAP(int, sy, ey);
1079         }
1080         buf+= sx + sy*stride;
1081         ey-= sy;
1082         if(ey) f= ((ex-sx)<<16)/ey;
1083         else   f= 0;
1084         for(y= 0; y <= ey; y++){
1085             x = (y*f)>>16;
1086             fr= (y*f)&0xFFFF;
1087             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1088             buf[y*stride + x+1]+= (color*         fr )>>16;;
1089         }
1090     }
1091 }
1092
1093 /**
1094  * draws an arrow from (ex, ey) -> (sx, sy).
1095  * @param w width of the image
1096  * @param h height of the image
1097  * @param stride stride/linesize of the image
1098  * @param color color of the arrow
1099  */
1100 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1101     int dx,dy;
1102
1103     sx= av_clip(sx, -100, w+100);
1104     sy= av_clip(sy, -100, h+100);
1105     ex= av_clip(ex, -100, w+100);
1106     ey= av_clip(ey, -100, h+100);
1107
1108     dx= ex - sx;
1109     dy= ey - sy;
1110
1111     if(dx*dx + dy*dy > 3*3){
1112         int rx=  dx + dy;
1113         int ry= -dx + dy;
1114         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1115
1116         //FIXME subpixel accuracy
1117         rx= ROUNDED_DIV(rx*3<<4, length);
1118         ry= ROUNDED_DIV(ry*3<<4, length);
1119
1120         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1121         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1122     }
1123     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1124 }
1125
1126 /**
1127  * prints debuging info for the given picture.
1128  */
1129 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1130
1131     if(!pict || !pict->mb_type) return;
1132
1133     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1134         int x,y;
1135
1136         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1137         switch (pict->pict_type) {
1138             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1139             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1140             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1141             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1142             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1143             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1144         }
1145         for(y=0; y<s->mb_height; y++){
1146             for(x=0; x<s->mb_width; x++){
1147                 if(s->avctx->debug&FF_DEBUG_SKIP){
1148                     int count= s->mbskip_table[x + y*s->mb_stride];
1149                     if(count>9) count=9;
1150                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1151                 }
1152                 if(s->avctx->debug&FF_DEBUG_QP){
1153                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1154                 }
1155                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1156                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1157                     //Type & MV direction
1158                     if(IS_PCM(mb_type))
1159                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1160                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1161                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1162                     else if(IS_INTRA4x4(mb_type))
1163                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1164                     else if(IS_INTRA16x16(mb_type))
1165                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1166                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1167                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1168                     else if(IS_DIRECT(mb_type))
1169                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1170                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1171                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1172                     else if(IS_GMC(mb_type))
1173                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1174                     else if(IS_SKIP(mb_type))
1175                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1176                     else if(!USES_LIST(mb_type, 1))
1177                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1178                     else if(!USES_LIST(mb_type, 0))
1179                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1180                     else{
1181                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1182                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1183                     }
1184
1185                     //segmentation
1186                     if(IS_8X8(mb_type))
1187                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1188                     else if(IS_16X8(mb_type))
1189                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1190                     else if(IS_8X16(mb_type))
1191                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1192                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1193                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1194                     else
1195                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1196
1197
1198                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1199                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1200                     else
1201                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1202                 }
1203 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1204             }
1205             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1206         }
1207     }
1208
1209     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1210         const int shift= 1 + s->quarter_sample;
1211         int mb_y;
1212         uint8_t *ptr;
1213         int i;
1214         int h_chroma_shift, v_chroma_shift;
1215         const int width = s->avctx->width;
1216         const int height= s->avctx->height;
1217         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1218         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1219         s->low_delay=0; //needed to see the vectors without trashing the buffers
1220
1221         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1222         for(i=0; i<3; i++){
1223             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1224             pict->data[i]= s->visualization_buffer[i];
1225         }
1226         pict->type= FF_BUFFER_TYPE_COPY;
1227         ptr= pict->data[0];
1228
1229         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1230             int mb_x;
1231             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1232                 const int mb_index= mb_x + mb_y*s->mb_stride;
1233                 if((s->avctx->debug_mv) && pict->motion_val){
1234                   int type;
1235                   for(type=0; type<3; type++){
1236                     int direction = 0;
1237                     switch (type) {
1238                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1239                                 continue;
1240                               direction = 0;
1241                               break;
1242                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1243                                 continue;
1244                               direction = 0;
1245                               break;
1246                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1247                                 continue;
1248                               direction = 1;
1249                               break;
1250                     }
1251                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1252                         continue;
1253
1254                     if(IS_8X8(pict->mb_type[mb_index])){
1255                       int i;
1256                       for(i=0; i<4; i++){
1257                         int sx= mb_x*16 + 4 + 8*(i&1);
1258                         int sy= mb_y*16 + 4 + 8*(i>>1);
1259                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1260                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1261                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1262                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1263                       }
1264                     }else if(IS_16X8(pict->mb_type[mb_index])){
1265                       int i;
1266                       for(i=0; i<2; i++){
1267                         int sx=mb_x*16 + 8;
1268                         int sy=mb_y*16 + 4 + 8*i;
1269                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1270                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1271                         int my=(pict->motion_val[direction][xy][1]>>shift);
1272
1273                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1274                             my*=2;
1275
1276                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1277                       }
1278                     }else if(IS_8X16(pict->mb_type[mb_index])){
1279                       int i;
1280                       for(i=0; i<2; i++){
1281                         int sx=mb_x*16 + 4 + 8*i;
1282                         int sy=mb_y*16 + 8;
1283                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1284                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1285                         int my=(pict->motion_val[direction][xy][1]>>shift);
1286
1287                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1288                             my*=2;
1289
1290                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1291                       }
1292                     }else{
1293                       int sx= mb_x*16 + 8;
1294                       int sy= mb_y*16 + 8;
1295                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1296                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1297                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1298                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1299                     }
1300                   }
1301                 }
1302                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1303                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1304                     int y;
1305                     for(y=0; y<8; y++){
1306                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1307                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1308                     }
1309                 }
1310                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1311                     int mb_type= pict->mb_type[mb_index];
1312                     uint64_t u,v;
1313                     int y;
1314 #define COLOR(theta, r)\
1315 u= (int)(128 + r*cos(theta*3.141592/180));\
1316 v= (int)(128 + r*sin(theta*3.141592/180));
1317
1318
1319                     u=v=128;
1320                     if(IS_PCM(mb_type)){
1321                         COLOR(120,48)
1322                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1323                         COLOR(30,48)
1324                     }else if(IS_INTRA4x4(mb_type)){
1325                         COLOR(90,48)
1326                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1327 //                        COLOR(120,48)
1328                     }else if(IS_DIRECT(mb_type)){
1329                         COLOR(150,48)
1330                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1331                         COLOR(170,48)
1332                     }else if(IS_GMC(mb_type)){
1333                         COLOR(190,48)
1334                     }else if(IS_SKIP(mb_type)){
1335 //                        COLOR(180,48)
1336                     }else if(!USES_LIST(mb_type, 1)){
1337                         COLOR(240,48)
1338                     }else if(!USES_LIST(mb_type, 0)){
1339                         COLOR(0,48)
1340                     }else{
1341                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1342                         COLOR(300,48)
1343                     }
1344
1345                     u*= 0x0101010101010101ULL;
1346                     v*= 0x0101010101010101ULL;
1347                     for(y=0; y<8; y++){
1348                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1349                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1350                     }
1351
1352                     //segmentation
1353                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1354                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1355                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1356                     }
1357                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1358                         for(y=0; y<16; y++)
1359                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1360                     }
1361                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1362                         int dm= 1 << (mv_sample_log2-2);
1363                         for(i=0; i<4; i++){
1364                             int sx= mb_x*16 + 8*(i&1);
1365                             int sy= mb_y*16 + 8*(i>>1);
1366                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1367                             //FIXME bidir
1368                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1369                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1370                                 for(y=0; y<8; y++)
1371                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1372                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1373                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1374                         }
1375                     }
1376
1377                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1378                         // hmm
1379                     }
1380                 }
1381                 s->mbskip_table[mb_index]=0;
1382             }
1383         }
1384     }
1385 }
1386
1387 /**
1388  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1389  * @param buf destination buffer
1390  * @param src source buffer
1391  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1392  * @param block_w width of block
1393  * @param block_h height of block
1394  * @param src_x x coordinate of the top left sample of the block in the source buffer
1395  * @param src_y y coordinate of the top left sample of the block in the source buffer
1396  * @param w width of the source buffer
1397  * @param h height of the source buffer
1398  */
1399 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1400                                     int src_x, int src_y, int w, int h){
1401     int x, y;
1402     int start_y, start_x, end_y, end_x;
1403
1404     if(src_y>= h){
1405         src+= (h-1-src_y)*linesize;
1406         src_y=h-1;
1407     }else if(src_y<=-block_h){
1408         src+= (1-block_h-src_y)*linesize;
1409         src_y=1-block_h;
1410     }
1411     if(src_x>= w){
1412         src+= (w-1-src_x);
1413         src_x=w-1;
1414     }else if(src_x<=-block_w){
1415         src+= (1-block_w-src_x);
1416         src_x=1-block_w;
1417     }
1418
1419     start_y= FFMAX(0, -src_y);
1420     start_x= FFMAX(0, -src_x);
1421     end_y= FFMIN(block_h, h-src_y);
1422     end_x= FFMIN(block_w, w-src_x);
1423
1424     // copy existing part
1425     for(y=start_y; y<end_y; y++){
1426         for(x=start_x; x<end_x; x++){
1427             buf[x + y*linesize]= src[x + y*linesize];
1428         }
1429     }
1430
1431     //top
1432     for(y=0; y<start_y; y++){
1433         for(x=start_x; x<end_x; x++){
1434             buf[x + y*linesize]= buf[x + start_y*linesize];
1435         }
1436     }
1437
1438     //bottom
1439     for(y=end_y; y<block_h; y++){
1440         for(x=start_x; x<end_x; x++){
1441             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1442         }
1443     }
1444
1445     for(y=0; y<block_h; y++){
1446        //left
1447         for(x=0; x<start_x; x++){
1448             buf[x + y*linesize]= buf[start_x + y*linesize];
1449         }
1450
1451        //right
1452         for(x=end_x; x<block_w; x++){
1453             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1454         }
1455     }
1456 }
1457
1458 static inline int hpel_motion_lowres(MpegEncContext *s,
1459                                   uint8_t *dest, uint8_t *src,
1460                                   int field_based, int field_select,
1461                                   int src_x, int src_y,
1462                                   int width, int height, int stride,
1463                                   int h_edge_pos, int v_edge_pos,
1464                                   int w, int h, h264_chroma_mc_func *pix_op,
1465                                   int motion_x, int motion_y)
1466 {
1467     const int lowres= s->avctx->lowres;
1468     const int s_mask= (2<<lowres)-1;
1469     int emu=0;
1470     int sx, sy;
1471
1472     if(s->quarter_sample){
1473         motion_x/=2;
1474         motion_y/=2;
1475     }
1476
1477     sx= motion_x & s_mask;
1478     sy= motion_y & s_mask;
1479     src_x += motion_x >> (lowres+1);
1480     src_y += motion_y >> (lowres+1);
1481
1482     src += src_y * stride + src_x;
1483
1484     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1485        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1486         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1487                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1488         src= s->edge_emu_buffer;
1489         emu=1;
1490     }
1491
1492     sx <<= 2 - lowres;
1493     sy <<= 2 - lowres;
1494     if(field_select)
1495         src += s->linesize;
1496     pix_op[lowres](dest, src, stride, h, sx, sy);
1497     return emu;
1498 }
1499
1500 /* apply one mpeg motion vector to the three components */
1501 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1502                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1503                                int field_based, int bottom_field, int field_select,
1504                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1505                                int motion_x, int motion_y, int h)
1506 {
1507     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1508     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1509     const int lowres= s->avctx->lowres;
1510     const int block_s= 8>>lowres;
1511     const int s_mask= (2<<lowres)-1;
1512     const int h_edge_pos = s->h_edge_pos >> lowres;
1513     const int v_edge_pos = s->v_edge_pos >> lowres;
1514     linesize   = s->current_picture.linesize[0] << field_based;
1515     uvlinesize = s->current_picture.linesize[1] << field_based;
1516
1517     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
1518         motion_x/=2;
1519         motion_y/=2;
1520     }
1521
1522     if(field_based){
1523         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1524     }
1525
1526     sx= motion_x & s_mask;
1527     sy= motion_y & s_mask;
1528     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1529     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1530
1531     if (s->out_format == FMT_H263) {
1532         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1533         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1534         uvsrc_x = src_x>>1;
1535         uvsrc_y = src_y>>1;
1536     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1537         mx = motion_x / 4;
1538         my = motion_y / 4;
1539         uvsx = (2*mx) & s_mask;
1540         uvsy = (2*my) & s_mask;
1541         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1542         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
1543     } else {
1544         mx = motion_x / 2;
1545         my = motion_y / 2;
1546         uvsx = mx & s_mask;
1547         uvsy = my & s_mask;
1548         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1549         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
1550     }
1551
1552     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1553     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1554     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1555
1556     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1557        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1558             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1559                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1560             ptr_y = s->edge_emu_buffer;
1561             if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1562                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1563                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1564                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1565                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1566                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1567                 ptr_cb= uvbuf;
1568                 ptr_cr= uvbuf+16;
1569             }
1570     }
1571
1572     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1573         dest_y += s->linesize;
1574         dest_cb+= s->uvlinesize;
1575         dest_cr+= s->uvlinesize;
1576     }
1577
1578     if(field_select){
1579         ptr_y += s->linesize;
1580         ptr_cb+= s->uvlinesize;
1581         ptr_cr+= s->uvlinesize;
1582     }
1583
1584     sx <<= 2 - lowres;
1585     sy <<= 2 - lowres;
1586     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1587
1588     if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1589         uvsx <<= 2 - lowres;
1590         uvsy <<= 2 - lowres;
1591         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1592         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1593     }
1594     //FIXME h261 lowres loop filter
1595 }
1596
1597 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1598                                      uint8_t *dest_cb, uint8_t *dest_cr,
1599                                      uint8_t **ref_picture,
1600                                      h264_chroma_mc_func *pix_op,
1601                                      int mx, int my){
1602     const int lowres= s->avctx->lowres;
1603     const int block_s= 8>>lowres;
1604     const int s_mask= (2<<lowres)-1;
1605     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1606     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1607     int emu=0, src_x, src_y, offset, sx, sy;
1608     uint8_t *ptr;
1609
1610     if(s->quarter_sample){
1611         mx/=2;
1612         my/=2;
1613     }
1614
1615     /* In case of 8X8, we construct a single chroma motion vector
1616        with a special rounding */
1617     mx= ff_h263_round_chroma(mx);
1618     my= ff_h263_round_chroma(my);
1619
1620     sx= mx & s_mask;
1621     sy= my & s_mask;
1622     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1623     src_y = s->mb_y*block_s + (my >> (lowres+1));
1624
1625     offset = src_y * s->uvlinesize + src_x;
1626     ptr = ref_picture[1] + offset;
1627     if(s->flags&CODEC_FLAG_EMU_EDGE){
1628         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1629            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1630             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1631             ptr= s->edge_emu_buffer;
1632             emu=1;
1633         }
1634     }
1635     sx <<= 2 - lowres;
1636     sy <<= 2 - lowres;
1637     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1638
1639     ptr = ref_picture[2] + offset;
1640     if(emu){
1641         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1642         ptr= s->edge_emu_buffer;
1643     }
1644     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1645 }
1646
1647 /**
1648  * motion compensation of a single macroblock
1649  * @param s context
1650  * @param dest_y luma destination pointer
1651  * @param dest_cb chroma cb/u destination pointer
1652  * @param dest_cr chroma cr/v destination pointer
1653  * @param dir direction (0->forward, 1->backward)
1654  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1655  * @param pic_op halfpel motion compensation function (average or put normally)
1656  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1657  */
1658 static inline void MPV_motion_lowres(MpegEncContext *s,
1659                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1660                               int dir, uint8_t **ref_picture,
1661                               h264_chroma_mc_func *pix_op)
1662 {
1663     int mx, my;
1664     int mb_x, mb_y, i;
1665     const int lowres= s->avctx->lowres;
1666     const int block_s= 8>>lowres;
1667
1668     mb_x = s->mb_x;
1669     mb_y = s->mb_y;
1670
1671     switch(s->mv_type) {
1672     case MV_TYPE_16X16:
1673         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1674                     0, 0, 0,
1675                     ref_picture, pix_op,
1676                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1677         break;
1678     case MV_TYPE_8X8:
1679         mx = 0;
1680         my = 0;
1681             for(i=0;i<4;i++) {
1682                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1683                             ref_picture[0], 0, 0,
1684                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1685                             s->width, s->height, s->linesize,
1686                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1687                             block_s, block_s, pix_op,
1688                             s->mv[dir][i][0], s->mv[dir][i][1]);
1689
1690                 mx += s->mv[dir][i][0];
1691                 my += s->mv[dir][i][1];
1692             }
1693
1694         if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1695             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1696         break;
1697     case MV_TYPE_FIELD:
1698         if (s->picture_structure == PICT_FRAME) {
1699             /* top field */
1700             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1701                         1, 0, s->field_select[dir][0],
1702                         ref_picture, pix_op,
1703                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
1704             /* bottom field */
1705             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1706                         1, 1, s->field_select[dir][1],
1707                         ref_picture, pix_op,
1708                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
1709         } else {
1710             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
1711                 ref_picture= s->current_picture_ptr->data;
1712             }
1713
1714             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1715                         0, 0, s->field_select[dir][0],
1716                         ref_picture, pix_op,
1717                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1718         }
1719         break;
1720     case MV_TYPE_16X8:
1721         for(i=0; i<2; i++){
1722             uint8_t ** ref2picture;
1723
1724             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
1725                 ref2picture= ref_picture;
1726             }else{
1727                 ref2picture= s->current_picture_ptr->data;
1728             }
1729
1730             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1731                         0, 0, s->field_select[dir][i],
1732                         ref2picture, pix_op,
1733                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
1734
1735             dest_y += 2*block_s*s->linesize;
1736             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1737             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1738         }
1739         break;
1740     case MV_TYPE_DMV:
1741         if(s->picture_structure == PICT_FRAME){
1742             for(i=0; i<2; i++){
1743                 int j;
1744                 for(j=0; j<2; j++){
1745                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1746                                 1, j, j^i,
1747                                 ref_picture, pix_op,
1748                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
1749                 }
1750                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1751             }
1752         }else{
1753             for(i=0; i<2; i++){
1754                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1755                             0, 0, s->picture_structure != i+1,
1756                             ref_picture, pix_op,
1757                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
1758
1759                 // after put we make avg of the same block
1760                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1761
1762                 //opposite parity is always in the same frame if this is second field
1763                 if(!s->first_field){
1764                     ref_picture = s->current_picture_ptr->data;
1765                 }
1766             }
1767         }
1768     break;
1769     default: assert(0);
1770     }
1771 }
1772
1773 /* put block[] to dest[] */
1774 static inline void put_dct(MpegEncContext *s,
1775                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1776 {
1777     s->dct_unquantize_intra(s, block, i, qscale);
1778     s->dsp.idct_put (dest, line_size, block);
1779 }
1780
1781 /* add block[] to dest[] */
1782 static inline void add_dct(MpegEncContext *s,
1783                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1784 {
1785     if (s->block_last_index[i] >= 0) {
1786         s->dsp.idct_add (dest, line_size, block);
1787     }
1788 }
1789
1790 static inline void add_dequant_dct(MpegEncContext *s,
1791                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1792 {
1793     if (s->block_last_index[i] >= 0) {
1794         s->dct_unquantize_inter(s, block, i, qscale);
1795
1796         s->dsp.idct_add (dest, line_size, block);
1797     }
1798 }
1799
1800 /**
1801  * cleans dc, ac, coded_block for the current non intra MB
1802  */
1803 void ff_clean_intra_table_entries(MpegEncContext *s)
1804 {
1805     int wrap = s->b8_stride;
1806     int xy = s->block_index[0];
1807
1808     s->dc_val[0][xy           ] =
1809     s->dc_val[0][xy + 1       ] =
1810     s->dc_val[0][xy     + wrap] =
1811     s->dc_val[0][xy + 1 + wrap] = 1024;
1812     /* ac pred */
1813     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1814     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1815     if (s->msmpeg4_version>=3) {
1816         s->coded_block[xy           ] =
1817         s->coded_block[xy + 1       ] =
1818         s->coded_block[xy     + wrap] =
1819         s->coded_block[xy + 1 + wrap] = 0;
1820     }
1821     /* chroma */
1822     wrap = s->mb_stride;
1823     xy = s->mb_x + s->mb_y * wrap;
1824     s->dc_val[1][xy] =
1825     s->dc_val[2][xy] = 1024;
1826     /* ac pred */
1827     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1828     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1829
1830     s->mbintra_table[xy]= 0;
1831 }
1832
1833 /* generic function called after a macroblock has been parsed by the
1834    decoder or after it has been encoded by the encoder.
1835
1836    Important variables used:
1837    s->mb_intra : true if intra macroblock
1838    s->mv_dir   : motion vector direction
1839    s->mv_type  : motion vector type
1840    s->mv       : motion vector
1841    s->interlaced_dct : true if interlaced dct used (mpeg2)
1842  */
1843 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
1844 {
1845     int mb_x, mb_y;
1846     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1847 #ifdef HAVE_XVMC
1848     if(s->avctx->xvmc_acceleration){
1849         XVMC_decode_mb(s);//xvmc uses pblocks
1850         return;
1851     }
1852 #endif
1853
1854     mb_x = s->mb_x;
1855     mb_y = s->mb_y;
1856
1857     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1858        /* save DCT coefficients */
1859        int i,j;
1860        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1861        for(i=0; i<6; i++)
1862            for(j=0; j<64; j++)
1863                *dct++ = block[i][s->dsp.idct_permutation[j]];
1864     }
1865
1866     s->current_picture.qscale_table[mb_xy]= s->qscale;
1867
1868     /* update DC predictors for P macroblocks */
1869     if (!s->mb_intra) {
1870         if (s->h263_pred || s->h263_aic) {
1871             if(s->mbintra_table[mb_xy])
1872                 ff_clean_intra_table_entries(s);
1873         } else {
1874             s->last_dc[0] =
1875             s->last_dc[1] =
1876             s->last_dc[2] = 128 << s->intra_dc_precision;
1877         }
1878     }
1879     else if (s->h263_pred || s->h263_aic)
1880         s->mbintra_table[mb_xy]=1;
1881
1882     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1883         uint8_t *dest_y, *dest_cb, *dest_cr;
1884         int dct_linesize, dct_offset;
1885         op_pixels_func (*op_pix)[4];
1886         qpel_mc_func (*op_qpix)[16];
1887         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1888         const int uvlinesize= s->current_picture.linesize[1];
1889         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1890         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1891
1892         /* avoid copy if macroblock skipped in last frame too */
1893         /* skip only during decoding as we might trash the buffers during encoding a bit */
1894         if(!s->encoding){
1895             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1896             const int age= s->current_picture.age;
1897
1898             assert(age);
1899
1900             if (s->mb_skipped) {
1901                 s->mb_skipped= 0;
1902                 assert(s->pict_type!=I_TYPE);
1903
1904                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1905                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1906
1907                 /* if previous was skipped too, then nothing to do !  */
1908                 if (*mbskip_ptr >= age && s->current_picture.reference){
1909                     return;
1910                 }
1911             } else if(!s->current_picture.reference){
1912                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1913                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1914             } else{
1915                 *mbskip_ptr = 0; /* not skipped */
1916             }
1917         }
1918
1919         dct_linesize = linesize << s->interlaced_dct;
1920         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1921
1922         if(readable){
1923             dest_y=  s->dest[0];
1924             dest_cb= s->dest[1];
1925             dest_cr= s->dest[2];
1926         }else{
1927             dest_y = s->b_scratchpad;
1928             dest_cb= s->b_scratchpad+16*linesize;
1929             dest_cr= s->b_scratchpad+32*linesize;
1930         }
1931
1932         if (!s->mb_intra) {
1933             /* motion handling */
1934             /* decoding or more than one mb_type (MC was already done otherwise) */
1935             if(!s->encoding){
1936                 if(lowres_flag){
1937                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1938
1939                     if (s->mv_dir & MV_DIR_FORWARD) {
1940                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1941                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1942                     }
1943                     if (s->mv_dir & MV_DIR_BACKWARD) {
1944                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1945                     }
1946                 }else{
1947                     op_qpix= s->me.qpel_put;
1948                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
1949                         op_pix = s->dsp.put_pixels_tab;
1950                     }else{
1951                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1952                     }
1953                     if (s->mv_dir & MV_DIR_FORWARD) {
1954                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1955                         op_pix = s->dsp.avg_pixels_tab;
1956                         op_qpix= s->me.qpel_avg;
1957                     }
1958                     if (s->mv_dir & MV_DIR_BACKWARD) {
1959                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1960                     }
1961                 }
1962             }
1963
1964             /* skip dequant / idct if we are really late ;) */
1965             if(s->hurry_up>1) goto skip_idct;
1966             if(s->avctx->skip_idct){
1967                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
1968                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
1969                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1970                     goto skip_idct;
1971             }
1972
1973             /* add dct residue */
1974             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1975                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1976                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1977                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1978                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1979                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1980
1981                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1982                     if (s->chroma_y_shift){
1983                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1984                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1985                     }else{
1986                         dct_linesize >>= 1;
1987                         dct_offset >>=1;
1988                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
1989                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
1990                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
1991                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
1992                     }
1993                 }
1994             } else if(s->codec_id != CODEC_ID_WMV2){
1995                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
1996                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
1997                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
1998                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
1999
2000                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2001                     if(s->chroma_y_shift){//Chroma420
2002                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
2003                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2004                     }else{
2005                         //chroma422
2006                         dct_linesize = uvlinesize << s->interlaced_dct;
2007                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2008
2009                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2010                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2011                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2012                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2013                         if(!s->chroma_x_shift){//Chroma444
2014                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2015                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2016                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2017                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2018                         }
2019                     }
2020                 }//fi gray
2021             }
2022             else if (ENABLE_WMV2) {
2023                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2024             }
2025         } else {
2026             /* dct only in intra block */
2027             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2028                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2029                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2030                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2031                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2032
2033                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2034                     if(s->chroma_y_shift){
2035                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2036                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2037                     }else{
2038                         dct_offset >>=1;
2039                         dct_linesize >>=1;
2040                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2041                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2042                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2043                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2044                     }
2045                 }
2046             }else{
2047                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2048                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2049                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2050                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2051
2052                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2053                     if(s->chroma_y_shift){
2054                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2055                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2056                     }else{
2057
2058                         dct_linesize = uvlinesize << s->interlaced_dct;
2059                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2060
2061                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2062                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2063                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2064                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2065                         if(!s->chroma_x_shift){//Chroma444
2066                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2067                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2068                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2069                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2070                         }
2071                     }
2072                 }//gray
2073             }
2074         }
2075 skip_idct:
2076         if(!readable){
2077             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2078             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2079             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2080         }
2081     }
2082 }
2083
2084 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2085     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
2086     else                  MPV_decode_mb_internal(s, block, 0);
2087 }
2088
2089 /**
2090  *
2091  * @param h is the normal height, this will be reduced automatically if needed for the last row
2092  */
2093 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2094     if (s->avctx->draw_horiz_band) {
2095         AVFrame *src;
2096         int offset[4];
2097
2098         if(s->picture_structure != PICT_FRAME){
2099             h <<= 1;
2100             y <<= 1;
2101             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2102         }
2103
2104         h= FFMIN(h, s->avctx->height - y);
2105
2106         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2107             src= (AVFrame*)s->current_picture_ptr;
2108         else if(s->last_picture_ptr)
2109             src= (AVFrame*)s->last_picture_ptr;
2110         else
2111             return;
2112
2113         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2114             offset[0]=
2115             offset[1]=
2116             offset[2]=
2117             offset[3]= 0;
2118         }else{
2119             offset[0]= y * s->linesize;;
2120             offset[1]=
2121             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2122             offset[3]= 0;
2123         }
2124
2125         emms_c();
2126
2127         s->avctx->draw_horiz_band(s->avctx, src, offset,
2128                                   y, s->picture_structure, h);
2129     }
2130 }
2131
2132 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2133     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2134     const int uvlinesize= s->current_picture.linesize[1];
2135     const int mb_size= 4 - s->avctx->lowres;
2136
2137     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2138     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2139     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2140     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2141     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2142     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2143     //block_index is not used by mpeg2, so it is not affected by chroma_format
2144
2145     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2146     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2147     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2148
2149     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2150     {
2151         s->dest[0] += s->mb_y *   linesize << mb_size;
2152         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2153         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2154     }
2155 }
2156
2157 void ff_mpeg_flush(AVCodecContext *avctx){
2158     int i;
2159     MpegEncContext *s = avctx->priv_data;
2160
2161     if(s==NULL || s->picture==NULL)
2162         return;
2163
2164     for(i=0; i<MAX_PICTURE_COUNT; i++){
2165        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2166                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2167         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
2168     }
2169     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2170
2171     s->mb_x= s->mb_y= 0;
2172
2173     s->parse_context.state= -1;
2174     s->parse_context.frame_start_found= 0;
2175     s->parse_context.overread= 0;
2176     s->parse_context.overread_index= 0;
2177     s->parse_context.index= 0;
2178     s->parse_context.last_index= 0;
2179     s->bitstream_buffer_size=0;
2180     s->pp_time=0;
2181 }
2182
2183 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2184                                    DCTELEM *block, int n, int qscale)
2185 {
2186     int i, level, nCoeffs;
2187     const uint16_t *quant_matrix;
2188
2189     nCoeffs= s->block_last_index[n];
2190
2191     if (n < 4)
2192         block[0] = block[0] * s->y_dc_scale;
2193     else
2194         block[0] = block[0] * s->c_dc_scale;
2195     /* XXX: only mpeg1 */
2196     quant_matrix = s->intra_matrix;
2197     for(i=1;i<=nCoeffs;i++) {
2198         int j= s->intra_scantable.permutated[i];
2199         level = block[j];
2200         if (level) {
2201             if (level < 0) {
2202                 level = -level;
2203                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2204                 level = (level - 1) | 1;
2205                 level = -level;
2206             } else {
2207                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2208                 level = (level - 1) | 1;
2209             }
2210             block[j] = level;
2211         }
2212     }
2213 }
2214
2215 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2216                                    DCTELEM *block, int n, int qscale)
2217 {
2218     int i, level, nCoeffs;
2219     const uint16_t *quant_matrix;
2220
2221     nCoeffs= s->block_last_index[n];
2222
2223     quant_matrix = s->inter_matrix;
2224     for(i=0; i<=nCoeffs; i++) {
2225         int j= s->intra_scantable.permutated[i];
2226         level = block[j];
2227         if (level) {
2228             if (level < 0) {
2229                 level = -level;
2230                 level = (((level << 1) + 1) * qscale *
2231                          ((int) (quant_matrix[j]))) >> 4;
2232                 level = (level - 1) | 1;
2233                 level = -level;
2234             } else {
2235                 level = (((level << 1) + 1) * qscale *
2236                          ((int) (quant_matrix[j]))) >> 4;
2237                 level = (level - 1) | 1;
2238             }
2239             block[j] = level;
2240         }
2241     }
2242 }
2243
2244 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2245                                    DCTELEM *block, int n, int qscale)
2246 {
2247     int i, level, nCoeffs;
2248     const uint16_t *quant_matrix;
2249
2250     if(s->alternate_scan) nCoeffs= 63;
2251     else nCoeffs= s->block_last_index[n];
2252
2253     if (n < 4)
2254         block[0] = block[0] * s->y_dc_scale;
2255     else
2256         block[0] = block[0] * s->c_dc_scale;
2257     quant_matrix = s->intra_matrix;
2258     for(i=1;i<=nCoeffs;i++) {
2259         int j= s->intra_scantable.permutated[i];
2260         level = block[j];
2261         if (level) {
2262             if (level < 0) {
2263                 level = -level;
2264                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2265                 level = -level;
2266             } else {
2267                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2268             }
2269             block[j] = level;
2270         }
2271     }
2272 }
2273
2274 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2275                                    DCTELEM *block, int n, int qscale)
2276 {
2277     int i, level, nCoeffs;
2278     const uint16_t *quant_matrix;
2279     int sum=-1;
2280
2281     if(s->alternate_scan) nCoeffs= 63;
2282     else nCoeffs= s->block_last_index[n];
2283
2284     if (n < 4)
2285         block[0] = block[0] * s->y_dc_scale;
2286     else
2287         block[0] = block[0] * s->c_dc_scale;
2288     quant_matrix = s->intra_matrix;
2289     for(i=1;i<=nCoeffs;i++) {
2290         int j= s->intra_scantable.permutated[i];
2291         level = block[j];
2292         if (level) {
2293             if (level < 0) {
2294                 level = -level;
2295                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2296                 level = -level;
2297             } else {
2298                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2299             }
2300             block[j] = level;
2301             sum+=level;
2302         }
2303     }
2304     block[63]^=sum&1;
2305 }
2306
2307 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2308                                    DCTELEM *block, int n, int qscale)
2309 {
2310     int i, level, nCoeffs;
2311     const uint16_t *quant_matrix;
2312     int sum=-1;
2313
2314     if(s->alternate_scan) nCoeffs= 63;
2315     else nCoeffs= s->block_last_index[n];
2316
2317     quant_matrix = s->inter_matrix;
2318     for(i=0; i<=nCoeffs; i++) {
2319         int j= s->intra_scantable.permutated[i];
2320         level = block[j];
2321         if (level) {
2322             if (level < 0) {
2323                 level = -level;
2324                 level = (((level << 1) + 1) * qscale *
2325                          ((int) (quant_matrix[j]))) >> 4;
2326                 level = -level;
2327             } else {
2328                 level = (((level << 1) + 1) * qscale *
2329                          ((int) (quant_matrix[j]))) >> 4;
2330             }
2331             block[j] = level;
2332             sum+=level;
2333         }
2334     }
2335     block[63]^=sum&1;
2336 }
2337
2338 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2339                                   DCTELEM *block, int n, int qscale)
2340 {
2341     int i, level, qmul, qadd;
2342     int nCoeffs;
2343
2344     assert(s->block_last_index[n]>=0);
2345
2346     qmul = qscale << 1;
2347
2348     if (!s->h263_aic) {
2349         if (n < 4)
2350             block[0] = block[0] * s->y_dc_scale;
2351         else
2352             block[0] = block[0] * s->c_dc_scale;
2353         qadd = (qscale - 1) | 1;
2354     }else{
2355         qadd = 0;
2356     }
2357     if(s->ac_pred)
2358         nCoeffs=63;
2359     else
2360         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2361
2362     for(i=1; i<=nCoeffs; i++) {
2363         level = block[i];
2364         if (level) {
2365             if (level < 0) {
2366                 level = level * qmul - qadd;
2367             } else {
2368                 level = level * qmul + qadd;
2369             }
2370             block[i] = level;
2371         }
2372     }
2373 }
2374
2375 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2376                                   DCTELEM *block, int n, int qscale)
2377 {
2378     int i, level, qmul, qadd;
2379     int nCoeffs;
2380
2381     assert(s->block_last_index[n]>=0);
2382
2383     qadd = (qscale - 1) | 1;
2384     qmul = qscale << 1;
2385
2386     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2387
2388     for(i=0; i<=nCoeffs; i++) {
2389         level = block[i];
2390         if (level) {
2391             if (level < 0) {
2392                 level = level * qmul - qadd;
2393             } else {
2394                 level = level * qmul + qadd;
2395             }
2396             block[i] = level;
2397         }
2398     }
2399 }
2400
2401 /**
2402  * set qscale and update qscale dependent variables.
2403  */
2404 void ff_set_qscale(MpegEncContext * s, int qscale)
2405 {
2406     if (qscale < 1)
2407         qscale = 1;
2408     else if (qscale > 31)
2409         qscale = 31;
2410
2411     s->qscale = qscale;
2412     s->chroma_qscale= s->chroma_qscale_table[qscale];
2413
2414     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2415     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2416 }