]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
some const
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "mpegvideo_common.h"
34 #include "mjpegenc.h"
35 #include "msmpeg4.h"
36 #include "faandct.h"
37 #include <limits.h>
38
39 //#undef NDEBUG
40 //#include <assert.h>
41
42 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
43                                    DCTELEM *block, int n, int qscale);
44 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57
58 #ifdef HAVE_XVMC
59 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
60 extern void XVMC_field_end(MpegEncContext *s);
61 extern void XVMC_decode_mb(MpegEncContext *s);
62 #endif
63
64 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
65
66
67 /* enable all paranoid tests for rounding, overflows, etc... */
68 //#define PARANOID
69
70 //#define DEBUG
71
72
73 static const uint8_t ff_default_chroma_qscale_table[32]={
74 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
75     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
76 };
77
78 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
79     int i;
80     int end;
81
82     st->scantable= src_scantable;
83
84     for(i=0; i<64; i++){
85         int j;
86         j = src_scantable[i];
87         st->permutated[i] = permutation[j];
88 #ifdef ARCH_POWERPC
89         st->inverse[j] = i;
90 #endif
91     }
92
93     end=-1;
94     for(i=0; i<64; i++){
95         int j;
96         j = st->permutated[i];
97         if(j>end) end=j;
98         st->raster_end[i]= end;
99     }
100 }
101
102 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
103     int i;
104
105     assert(p<=end);
106     if(p>=end)
107         return end;
108
109     for(i=0; i<3; i++){
110         uint32_t tmp= *state << 8;
111         *state= tmp + *(p++);
112         if(tmp == 0x100 || p==end)
113             return p;
114     }
115
116     while(p<end){
117         if     (p[-1] > 1      ) p+= 3;
118         else if(p[-2]          ) p+= 2;
119         else if(p[-3]|(p[-1]-1)) p++;
120         else{
121             p++;
122             break;
123         }
124     }
125
126     p= FFMIN(p, end)-4;
127     *state= AV_RB32(p);
128
129     return p+4;
130 }
131
132 /* init common dct for both encoder and decoder */
133 int ff_dct_common_init(MpegEncContext *s)
134 {
135     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
136     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
137     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
138     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
139     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
140     if(s->flags & CODEC_FLAG_BITEXACT)
141         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
142     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
143
144 #if defined(HAVE_MMX)
145     MPV_common_init_mmx(s);
146 #elif defined(ARCH_ALPHA)
147     MPV_common_init_axp(s);
148 #elif defined(HAVE_MLIB)
149     MPV_common_init_mlib(s);
150 #elif defined(HAVE_MMI)
151     MPV_common_init_mmi(s);
152 #elif defined(ARCH_ARMV4L)
153     MPV_common_init_armv4l(s);
154 #elif defined(HAVE_ALTIVEC)
155     MPV_common_init_altivec(s);
156 #elif defined(ARCH_BFIN)
157     MPV_common_init_bfin(s);
158 #endif
159
160     /* load & permutate scantables
161        note: only wmv uses different ones
162     */
163     if(s->alternate_scan){
164         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
165         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
166     }else{
167         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
168         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
169     }
170     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
171     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
172
173     return 0;
174 }
175
176 void copy_picture(Picture *dst, Picture *src){
177     *dst = *src;
178     dst->type= FF_BUFFER_TYPE_COPY;
179 }
180
181 /**
182  * allocates a Picture
183  * The pixels are allocated/set by calling get_buffer() if shared=0
184  */
185 int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
186     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
187     const int mb_array_size= s->mb_stride*s->mb_height;
188     const int b8_array_size= s->b8_stride*s->mb_height*2;
189     const int b4_array_size= s->b4_stride*s->mb_height*4;
190     int i;
191     int r= -1;
192
193     if(shared){
194         assert(pic->data[0]);
195         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
196         pic->type= FF_BUFFER_TYPE_SHARED;
197     }else{
198         assert(!pic->data[0]);
199
200         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
201
202         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
203             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
204             return -1;
205         }
206
207         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
208             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
209             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
210             return -1;
211         }
212
213         if(pic->linesize[1] != pic->linesize[2]){
214             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
215             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
216             return -1;
217         }
218
219         s->linesize  = pic->linesize[0];
220         s->uvlinesize= pic->linesize[1];
221     }
222
223     if(pic->qscale_table==NULL){
224         if (s->encoding) {
225             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
226             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
227             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
228         }
229
230         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
231         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
232         CHECKED_ALLOCZ(pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t))
233         pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
234         if(s->out_format == FMT_H264){
235             for(i=0; i<2; i++){
236                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
237                 pic->motion_val[i]= pic->motion_val_base[i]+4;
238                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
239             }
240             pic->motion_subsample_log2= 2;
241         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
242             for(i=0; i<2; i++){
243                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
244                 pic->motion_val[i]= pic->motion_val_base[i]+4;
245                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
246             }
247             pic->motion_subsample_log2= 3;
248         }
249         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
250             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
251         }
252         pic->qstride= s->mb_stride;
253         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
254     }
255
256     /* It might be nicer if the application would keep track of these
257      * but it would require an API change. */
258     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
259     s->prev_pict_types[0]= s->pict_type;
260     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
261         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
262
263     return 0;
264 fail: //for the CHECKED_ALLOCZ macro
265     if(r>=0)
266         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
267     return -1;
268 }
269
270 /**
271  * deallocates a picture
272  */
273 static void free_picture(MpegEncContext *s, Picture *pic){
274     int i;
275
276     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
277         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
278     }
279
280     av_freep(&pic->mb_var);
281     av_freep(&pic->mc_mb_var);
282     av_freep(&pic->mb_mean);
283     av_freep(&pic->mbskip_table);
284     av_freep(&pic->qscale_table);
285     av_freep(&pic->mb_type_base);
286     av_freep(&pic->dct_coeff);
287     av_freep(&pic->pan_scan);
288     pic->mb_type= NULL;
289     for(i=0; i<2; i++){
290         av_freep(&pic->motion_val_base[i]);
291         av_freep(&pic->ref_index[i]);
292     }
293
294     if(pic->type == FF_BUFFER_TYPE_SHARED){
295         for(i=0; i<4; i++){
296             pic->base[i]=
297             pic->data[i]= NULL;
298         }
299         pic->type= 0;
300     }
301 }
302
303 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
304     int i;
305
306     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
307     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
308     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
309
310      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
311     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
312     s->rd_scratchpad=   s->me.scratchpad;
313     s->b_scratchpad=    s->me.scratchpad;
314     s->obmc_scratchpad= s->me.scratchpad + 16;
315     if (s->encoding) {
316         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
317         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
318         if(s->avctx->noise_reduction){
319             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
320         }
321     }
322     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
323     s->block= s->blocks[0];
324
325     for(i=0;i<12;i++){
326         s->pblocks[i] = (short *)(&s->block[i]);
327     }
328     return 0;
329 fail:
330     return -1; //free() through MPV_common_end()
331 }
332
333 static void free_duplicate_context(MpegEncContext *s){
334     if(s==NULL) return;
335
336     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
337     av_freep(&s->me.scratchpad);
338     s->rd_scratchpad=
339     s->b_scratchpad=
340     s->obmc_scratchpad= NULL;
341
342     av_freep(&s->dct_error_sum);
343     av_freep(&s->me.map);
344     av_freep(&s->me.score_map);
345     av_freep(&s->blocks);
346     s->block= NULL;
347 }
348
349 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
350 #define COPY(a) bak->a= src->a
351     COPY(allocated_edge_emu_buffer);
352     COPY(edge_emu_buffer);
353     COPY(me.scratchpad);
354     COPY(rd_scratchpad);
355     COPY(b_scratchpad);
356     COPY(obmc_scratchpad);
357     COPY(me.map);
358     COPY(me.score_map);
359     COPY(blocks);
360     COPY(block);
361     COPY(start_mb_y);
362     COPY(end_mb_y);
363     COPY(me.map_generation);
364     COPY(pb);
365     COPY(dct_error_sum);
366     COPY(dct_count[0]);
367     COPY(dct_count[1]);
368 #undef COPY
369 }
370
371 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
372     MpegEncContext bak;
373     int i;
374     //FIXME copy only needed parts
375 //START_TIMER
376     backup_duplicate_context(&bak, dst);
377     memcpy(dst, src, sizeof(MpegEncContext));
378     backup_duplicate_context(dst, &bak);
379     for(i=0;i<12;i++){
380         dst->pblocks[i] = (short *)(&dst->block[i]);
381     }
382 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
383 }
384
385 /**
386  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
387  * the changed fields will not depend upon the prior state of the MpegEncContext.
388  */
389 void MPV_common_defaults(MpegEncContext *s){
390     s->y_dc_scale_table=
391     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
392     s->chroma_qscale_table= ff_default_chroma_qscale_table;
393     s->progressive_frame= 1;
394     s->progressive_sequence= 1;
395     s->picture_structure= PICT_FRAME;
396
397     s->coded_picture_number = 0;
398     s->picture_number = 0;
399     s->input_picture_number = 0;
400
401     s->picture_in_gop_number = 0;
402
403     s->f_code = 1;
404     s->b_code = 1;
405 }
406
407 /**
408  * sets the given MpegEncContext to defaults for decoding.
409  * the changed fields will not depend upon the prior state of the MpegEncContext.
410  */
411 void MPV_decode_defaults(MpegEncContext *s){
412     MPV_common_defaults(s);
413 }
414
415 /**
416  * init common structure for both encoder and decoder.
417  * this assumes that some variables like width/height are already set
418  */
419 int MPV_common_init(MpegEncContext *s)
420 {
421     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
422
423     s->mb_height = (s->height + 15) / 16;
424
425     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
426         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
427         return -1;
428     }
429
430     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
431         return -1;
432
433     dsputil_init(&s->dsp, s->avctx);
434     ff_dct_common_init(s);
435
436     s->flags= s->avctx->flags;
437     s->flags2= s->avctx->flags2;
438
439     s->mb_width  = (s->width  + 15) / 16;
440     s->mb_stride = s->mb_width + 1;
441     s->b8_stride = s->mb_width*2 + 1;
442     s->b4_stride = s->mb_width*4 + 1;
443     mb_array_size= s->mb_height * s->mb_stride;
444     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
445
446     /* set chroma shifts */
447     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
448                                                     &(s->chroma_y_shift) );
449
450     /* set default edge pos, will be overriden in decode_header if needed */
451     s->h_edge_pos= s->mb_width*16;
452     s->v_edge_pos= s->mb_height*16;
453
454     s->mb_num = s->mb_width * s->mb_height;
455
456     s->block_wrap[0]=
457     s->block_wrap[1]=
458     s->block_wrap[2]=
459     s->block_wrap[3]= s->b8_stride;
460     s->block_wrap[4]=
461     s->block_wrap[5]= s->mb_stride;
462
463     y_size = s->b8_stride * (2 * s->mb_height + 1);
464     c_size = s->mb_stride * (s->mb_height + 1);
465     yc_size = y_size + 2 * c_size;
466
467     /* convert fourcc to upper case */
468     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
469                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
470                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
471                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
472
473     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
474                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
475                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
476                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
477
478     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
479
480     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
481     for(y=0; y<s->mb_height; y++){
482         for(x=0; x<s->mb_width; x++){
483             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
484         }
485     }
486     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
487
488     if (s->encoding) {
489         /* Allocate MV tables */
490         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
491         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
492         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
493         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
494         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
495         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
496         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
497         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
498         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
499         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
500         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
501         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
502
503         if(s->msmpeg4_version){
504             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
505         }
506         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
507
508         /* Allocate MB type table */
509         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
510
511         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
512
513         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
514         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
515         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
516         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
517         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
518         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
519
520         if(s->avctx->noise_reduction){
521             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
522         }
523     }
524     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
525
526     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
527
528     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
529         /* interlaced direct mode decoding tables */
530             for(i=0; i<2; i++){
531                 int j, k;
532                 for(j=0; j<2; j++){
533                     for(k=0; k<2; k++){
534                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
535                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
536                     }
537                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
538                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
539                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
540                 }
541                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
542             }
543     }
544     if (s->out_format == FMT_H263) {
545         /* ac values */
546         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
547         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
548         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
549         s->ac_val[2] = s->ac_val[1] + c_size;
550
551         /* cbp values */
552         CHECKED_ALLOCZ(s->coded_block_base, y_size);
553         s->coded_block= s->coded_block_base + s->b8_stride + 1;
554
555         /* cbp, ac_pred, pred_dir */
556         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
557         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
558     }
559
560     if (s->h263_pred || s->h263_plus || !s->encoding) {
561         /* dc values */
562         //MN: we need these for error resilience of intra-frames
563         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
564         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
565         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
566         s->dc_val[2] = s->dc_val[1] + c_size;
567         for(i=0;i<yc_size;i++)
568             s->dc_val_base[i] = 1024;
569     }
570
571     /* which mb is a intra block */
572     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
573     memset(s->mbintra_table, 1, mb_array_size);
574
575     /* init macroblock skip table */
576     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
577     //Note the +1 is for a quicker mpeg4 slice_end detection
578     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
579
580     s->parse_context.state= -1;
581     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
582        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
583        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
584        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
585     }
586
587     s->context_initialized = 1;
588
589     s->thread_context[0]= s;
590     threads = s->avctx->thread_count;
591
592     for(i=1; i<threads; i++){
593         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
594         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
595     }
596
597     for(i=0; i<threads; i++){
598         if(init_duplicate_context(s->thread_context[i], s) < 0)
599            goto fail;
600         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
601         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
602     }
603
604     return 0;
605  fail:
606     MPV_common_end(s);
607     return -1;
608 }
609
610 /* init common structure for both encoder and decoder */
611 void MPV_common_end(MpegEncContext *s)
612 {
613     int i, j, k;
614
615     for(i=0; i<s->avctx->thread_count; i++){
616         free_duplicate_context(s->thread_context[i]);
617     }
618     for(i=1; i<s->avctx->thread_count; i++){
619         av_freep(&s->thread_context[i]);
620     }
621
622     av_freep(&s->parse_context.buffer);
623     s->parse_context.buffer_size=0;
624
625     av_freep(&s->mb_type);
626     av_freep(&s->p_mv_table_base);
627     av_freep(&s->b_forw_mv_table_base);
628     av_freep(&s->b_back_mv_table_base);
629     av_freep(&s->b_bidir_forw_mv_table_base);
630     av_freep(&s->b_bidir_back_mv_table_base);
631     av_freep(&s->b_direct_mv_table_base);
632     s->p_mv_table= NULL;
633     s->b_forw_mv_table= NULL;
634     s->b_back_mv_table= NULL;
635     s->b_bidir_forw_mv_table= NULL;
636     s->b_bidir_back_mv_table= NULL;
637     s->b_direct_mv_table= NULL;
638     for(i=0; i<2; i++){
639         for(j=0; j<2; j++){
640             for(k=0; k<2; k++){
641                 av_freep(&s->b_field_mv_table_base[i][j][k]);
642                 s->b_field_mv_table[i][j][k]=NULL;
643             }
644             av_freep(&s->b_field_select_table[i][j]);
645             av_freep(&s->p_field_mv_table_base[i][j]);
646             s->p_field_mv_table[i][j]=NULL;
647         }
648         av_freep(&s->p_field_select_table[i]);
649     }
650
651     av_freep(&s->dc_val_base);
652     av_freep(&s->ac_val_base);
653     av_freep(&s->coded_block_base);
654     av_freep(&s->mbintra_table);
655     av_freep(&s->cbp_table);
656     av_freep(&s->pred_dir_table);
657
658     av_freep(&s->mbskip_table);
659     av_freep(&s->prev_pict_types);
660     av_freep(&s->bitstream_buffer);
661     s->allocated_bitstream_buffer_size=0;
662
663     av_freep(&s->avctx->stats_out);
664     av_freep(&s->ac_stats);
665     av_freep(&s->error_status_table);
666     av_freep(&s->mb_index2xy);
667     av_freep(&s->lambda_table);
668     av_freep(&s->q_intra_matrix);
669     av_freep(&s->q_inter_matrix);
670     av_freep(&s->q_intra_matrix16);
671     av_freep(&s->q_inter_matrix16);
672     av_freep(&s->input_picture);
673     av_freep(&s->reordered_input_picture);
674     av_freep(&s->dct_offset);
675
676     if(s->picture){
677         for(i=0; i<MAX_PICTURE_COUNT; i++){
678             free_picture(s, &s->picture[i]);
679         }
680     }
681     av_freep(&s->picture);
682     s->context_initialized = 0;
683     s->last_picture_ptr=
684     s->next_picture_ptr=
685     s->current_picture_ptr= NULL;
686     s->linesize= s->uvlinesize= 0;
687
688     for(i=0; i<3; i++)
689         av_freep(&s->visualization_buffer[i]);
690
691     avcodec_default_free_buffers(s->avctx);
692 }
693
694 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
695 {
696     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
697     uint8_t index_run[MAX_RUN+1];
698     int last, run, level, start, end, i;
699
700     /* If table is static, we can quit if rl->max_level[0] is not NULL */
701     if(static_store && rl->max_level[0])
702         return;
703
704     /* compute max_level[], max_run[] and index_run[] */
705     for(last=0;last<2;last++) {
706         if (last == 0) {
707             start = 0;
708             end = rl->last;
709         } else {
710             start = rl->last;
711             end = rl->n;
712         }
713
714         memset(max_level, 0, MAX_RUN + 1);
715         memset(max_run, 0, MAX_LEVEL + 1);
716         memset(index_run, rl->n, MAX_RUN + 1);
717         for(i=start;i<end;i++) {
718             run = rl->table_run[i];
719             level = rl->table_level[i];
720             if (index_run[run] == rl->n)
721                 index_run[run] = i;
722             if (level > max_level[run])
723                 max_level[run] = level;
724             if (run > max_run[level])
725                 max_run[level] = run;
726         }
727         if(static_store)
728             rl->max_level[last] = static_store[last];
729         else
730             rl->max_level[last] = av_malloc(MAX_RUN + 1);
731         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
732         if(static_store)
733             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
734         else
735             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
736         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
737         if(static_store)
738             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
739         else
740             rl->index_run[last] = av_malloc(MAX_RUN + 1);
741         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
742     }
743 }
744
745 void init_vlc_rl(RLTable *rl, int use_static)
746 {
747     int i, q;
748
749     /* Return if static table is already initialized */
750     if(use_static && rl->rl_vlc[0])
751         return;
752
753     init_vlc(&rl->vlc, 9, rl->n + 1,
754              &rl->table_vlc[0][1], 4, 2,
755              &rl->table_vlc[0][0], 4, 2, use_static);
756
757
758     for(q=0; q<32; q++){
759         int qmul= q*2;
760         int qadd= (q-1)|1;
761
762         if(q==0){
763             qmul=1;
764             qadd=0;
765         }
766         if(use_static)
767             rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
768         else
769             rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
770         for(i=0; i<rl->vlc.table_size; i++){
771             int code= rl->vlc.table[i][0];
772             int len = rl->vlc.table[i][1];
773             int level, run;
774
775             if(len==0){ // illegal code
776                 run= 66;
777                 level= MAX_LEVEL;
778             }else if(len<0){ //more bits needed
779                 run= 0;
780                 level= code;
781             }else{
782                 if(code==rl->n){ //esc
783                     run= 66;
784                     level= 0;
785                 }else{
786                     run=   rl->table_run  [code] + 1;
787                     level= rl->table_level[code] * qmul + qadd;
788                     if(code >= rl->last) run+=192;
789                 }
790             }
791             rl->rl_vlc[q][i].len= len;
792             rl->rl_vlc[q][i].level= level;
793             rl->rl_vlc[q][i].run= run;
794         }
795     }
796 }
797
798 /* draw the edges of width 'w' of an image of size width, height */
799 //FIXME check that this is ok for mpeg4 interlaced
800 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
801 {
802     uint8_t *ptr, *last_line;
803     int i;
804
805     last_line = buf + (height - 1) * wrap;
806     for(i=0;i<w;i++) {
807         /* top and bottom */
808         memcpy(buf - (i + 1) * wrap, buf, width);
809         memcpy(last_line + (i + 1) * wrap, last_line, width);
810     }
811     /* left and right */
812     ptr = buf;
813     for(i=0;i<height;i++) {
814         memset(ptr - w, ptr[0], w);
815         memset(ptr + width, ptr[width-1], w);
816         ptr += wrap;
817     }
818     /* corners */
819     for(i=0;i<w;i++) {
820         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
821         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
822         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
823         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
824     }
825 }
826
827 int ff_find_unused_picture(MpegEncContext *s, int shared){
828     int i;
829
830     if(shared){
831         for(i=0; i<MAX_PICTURE_COUNT; i++){
832             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
833         }
834     }else{
835         for(i=0; i<MAX_PICTURE_COUNT; i++){
836             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
837         }
838         for(i=0; i<MAX_PICTURE_COUNT; i++){
839             if(s->picture[i].data[0]==NULL) return i;
840         }
841     }
842
843     av_log(s->avctx, AV_LOG_FATAL, "Internal error, picture buffer overflow\n");
844     /* We could return -1, but the codec would crash trying to draw into a
845      * non-existing frame anyway. This is safer than waiting for a random crash.
846      * Also the return of this is never useful, an encoder must only allocate
847      * as much as allowed in the specification. This has no relationship to how
848      * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
849      * enough for such valid streams).
850      * Plus, a decoder has to check stream validity and remove frames if too
851      * many reference frames are around. Waiting for "OOM" is not correct at
852      * all. Similarly, missing reference frames have to be replaced by
853      * interpolated/MC frames, anything else is a bug in the codec ...
854      */
855     abort();
856     return -1;
857 }
858
859 static void update_noise_reduction(MpegEncContext *s){
860     int intra, i;
861
862     for(intra=0; intra<2; intra++){
863         if(s->dct_count[intra] > (1<<16)){
864             for(i=0; i<64; i++){
865                 s->dct_error_sum[intra][i] >>=1;
866             }
867             s->dct_count[intra] >>= 1;
868         }
869
870         for(i=0; i<64; i++){
871             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
872         }
873     }
874 }
875
876 /**
877  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
878  */
879 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
880 {
881     int i;
882     AVFrame *pic;
883     s->mb_skipped = 0;
884
885     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
886
887     /* mark&release old frames */
888     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
889       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
890         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
891
892         /* release forgotten pictures */
893         /* if(mpeg124/h263) */
894         if(!s->encoding){
895             for(i=0; i<MAX_PICTURE_COUNT; i++){
896                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
897                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
898                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
899                 }
900             }
901         }
902       }
903     }
904 alloc:
905     if(!s->encoding){
906         /* release non reference frames */
907         for(i=0; i<MAX_PICTURE_COUNT; i++){
908             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
909                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
910             }
911         }
912
913         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
914             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
915         else{
916             i= ff_find_unused_picture(s, 0);
917             pic= (AVFrame*)&s->picture[i];
918         }
919
920         pic->reference= 0;
921         if (!s->dropable){
922             if (s->codec_id == CODEC_ID_H264)
923                 pic->reference = s->picture_structure;
924             else if (s->pict_type != B_TYPE)
925                 pic->reference = 3;
926         }
927
928         pic->coded_picture_number= s->coded_picture_number++;
929
930         if( alloc_picture(s, (Picture*)pic, 0) < 0)
931             return -1;
932
933         s->current_picture_ptr= (Picture*)pic;
934         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
935         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
936     }
937
938     s->current_picture_ptr->pict_type= s->pict_type;
939 //    if(s->flags && CODEC_FLAG_QSCALE)
940   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
941     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
942
943     copy_picture(&s->current_picture, s->current_picture_ptr);
944
945     if (s->pict_type != B_TYPE) {
946         s->last_picture_ptr= s->next_picture_ptr;
947         if(!s->dropable)
948             s->next_picture_ptr= s->current_picture_ptr;
949     }
950 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
951         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
952         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
953         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
954         s->pict_type, s->dropable);*/
955
956     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
957     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
958
959     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
960         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
961         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
962         goto alloc;
963     }
964
965     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
966
967     if(s->picture_structure!=PICT_FRAME && s->out_format != FMT_H264){
968         int i;
969         for(i=0; i<4; i++){
970             if(s->picture_structure == PICT_BOTTOM_FIELD){
971                  s->current_picture.data[i] += s->current_picture.linesize[i];
972             }
973             s->current_picture.linesize[i] *= 2;
974             s->last_picture.linesize[i] *=2;
975             s->next_picture.linesize[i] *=2;
976         }
977     }
978
979     s->hurry_up= s->avctx->hurry_up;
980     s->error_resilience= avctx->error_resilience;
981
982     /* set dequantizer, we can't do it during init as it might change for mpeg4
983        and we can't do it in the header decode as init is not called for mpeg4 there yet */
984     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
985         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
986         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
987     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
988         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
989         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
990     }else{
991         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
992         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
993     }
994
995     if(s->dct_error_sum){
996         assert(s->avctx->noise_reduction && s->encoding);
997
998         update_noise_reduction(s);
999     }
1000
1001 #ifdef HAVE_XVMC
1002     if(s->avctx->xvmc_acceleration)
1003         return XVMC_field_start(s, avctx);
1004 #endif
1005     return 0;
1006 }
1007
1008 /* generic function for encode/decode called after a frame has been coded/decoded */
1009 void MPV_frame_end(MpegEncContext *s)
1010 {
1011     int i;
1012     /* draw edge for correct motion prediction if outside */
1013 #ifdef HAVE_XVMC
1014 //just to make sure that all data is rendered.
1015     if(s->avctx->xvmc_acceleration){
1016         XVMC_field_end(s);
1017     }else
1018 #endif
1019     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1020             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1021             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1022             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1023     }
1024     emms_c();
1025
1026     s->last_pict_type    = s->pict_type;
1027     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1028     if(s->pict_type!=B_TYPE){
1029         s->last_non_b_pict_type= s->pict_type;
1030     }
1031 #if 0
1032         /* copy back current_picture variables */
1033     for(i=0; i<MAX_PICTURE_COUNT; i++){
1034         if(s->picture[i].data[0] == s->current_picture.data[0]){
1035             s->picture[i]= s->current_picture;
1036             break;
1037         }
1038     }
1039     assert(i<MAX_PICTURE_COUNT);
1040 #endif
1041
1042     if(s->encoding){
1043         /* release non-reference frames */
1044         for(i=0; i<MAX_PICTURE_COUNT; i++){
1045             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1046                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1047             }
1048         }
1049     }
1050     // clear copies, to avoid confusion
1051 #if 0
1052     memset(&s->last_picture, 0, sizeof(Picture));
1053     memset(&s->next_picture, 0, sizeof(Picture));
1054     memset(&s->current_picture, 0, sizeof(Picture));
1055 #endif
1056     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1057 }
1058
1059 /**
1060  * draws an line from (ex, ey) -> (sx, sy).
1061  * @param w width of the image
1062  * @param h height of the image
1063  * @param stride stride/linesize of the image
1064  * @param color color of the arrow
1065  */
1066 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1067     int x, y, fr, f;
1068
1069     sx= av_clip(sx, 0, w-1);
1070     sy= av_clip(sy, 0, h-1);
1071     ex= av_clip(ex, 0, w-1);
1072     ey= av_clip(ey, 0, h-1);
1073
1074     buf[sy*stride + sx]+= color;
1075
1076     if(FFABS(ex - sx) > FFABS(ey - sy)){
1077         if(sx > ex){
1078             FFSWAP(int, sx, ex);
1079             FFSWAP(int, sy, ey);
1080         }
1081         buf+= sx + sy*stride;
1082         ex-= sx;
1083         f= ((ey-sy)<<16)/ex;
1084         for(x= 0; x <= ex; x++){
1085             y = (x*f)>>16;
1086             fr= (x*f)&0xFFFF;
1087             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1088             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1089         }
1090     }else{
1091         if(sy > ey){
1092             FFSWAP(int, sx, ex);
1093             FFSWAP(int, sy, ey);
1094         }
1095         buf+= sx + sy*stride;
1096         ey-= sy;
1097         if(ey) f= ((ex-sx)<<16)/ey;
1098         else   f= 0;
1099         for(y= 0; y <= ey; y++){
1100             x = (y*f)>>16;
1101             fr= (y*f)&0xFFFF;
1102             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1103             buf[y*stride + x+1]+= (color*         fr )>>16;;
1104         }
1105     }
1106 }
1107
1108 /**
1109  * draws an arrow from (ex, ey) -> (sx, sy).
1110  * @param w width of the image
1111  * @param h height of the image
1112  * @param stride stride/linesize of the image
1113  * @param color color of the arrow
1114  */
1115 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1116     int dx,dy;
1117
1118     sx= av_clip(sx, -100, w+100);
1119     sy= av_clip(sy, -100, h+100);
1120     ex= av_clip(ex, -100, w+100);
1121     ey= av_clip(ey, -100, h+100);
1122
1123     dx= ex - sx;
1124     dy= ey - sy;
1125
1126     if(dx*dx + dy*dy > 3*3){
1127         int rx=  dx + dy;
1128         int ry= -dx + dy;
1129         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1130
1131         //FIXME subpixel accuracy
1132         rx= ROUNDED_DIV(rx*3<<4, length);
1133         ry= ROUNDED_DIV(ry*3<<4, length);
1134
1135         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1136         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1137     }
1138     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1139 }
1140
1141 /**
1142  * prints debuging info for the given picture.
1143  */
1144 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1145
1146     if(!pict || !pict->mb_type) return;
1147
1148     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1149         int x,y;
1150
1151         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1152         switch (pict->pict_type) {
1153             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1154             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1155             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1156             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1157             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1158             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1159         }
1160         for(y=0; y<s->mb_height; y++){
1161             for(x=0; x<s->mb_width; x++){
1162                 if(s->avctx->debug&FF_DEBUG_SKIP){
1163                     int count= s->mbskip_table[x + y*s->mb_stride];
1164                     if(count>9) count=9;
1165                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1166                 }
1167                 if(s->avctx->debug&FF_DEBUG_QP){
1168                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1169                 }
1170                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1171                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1172                     //Type & MV direction
1173                     if(IS_PCM(mb_type))
1174                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1175                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1176                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1177                     else if(IS_INTRA4x4(mb_type))
1178                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1179                     else if(IS_INTRA16x16(mb_type))
1180                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1181                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1182                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1183                     else if(IS_DIRECT(mb_type))
1184                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1185                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1186                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1187                     else if(IS_GMC(mb_type))
1188                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1189                     else if(IS_SKIP(mb_type))
1190                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1191                     else if(!USES_LIST(mb_type, 1))
1192                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1193                     else if(!USES_LIST(mb_type, 0))
1194                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1195                     else{
1196                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1197                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1198                     }
1199
1200                     //segmentation
1201                     if(IS_8X8(mb_type))
1202                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1203                     else if(IS_16X8(mb_type))
1204                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1205                     else if(IS_8X16(mb_type))
1206                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1207                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1208                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1209                     else
1210                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1211
1212
1213                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1214                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1215                     else
1216                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1217                 }
1218 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1219             }
1220             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1221         }
1222     }
1223
1224     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1225         const int shift= 1 + s->quarter_sample;
1226         int mb_y;
1227         uint8_t *ptr;
1228         int i;
1229         int h_chroma_shift, v_chroma_shift;
1230         const int width = s->avctx->width;
1231         const int height= s->avctx->height;
1232         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1233         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1234         s->low_delay=0; //needed to see the vectors without trashing the buffers
1235
1236         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1237         for(i=0; i<3; i++){
1238             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1239             pict->data[i]= s->visualization_buffer[i];
1240         }
1241         pict->type= FF_BUFFER_TYPE_COPY;
1242         ptr= pict->data[0];
1243
1244         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1245             int mb_x;
1246             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1247                 const int mb_index= mb_x + mb_y*s->mb_stride;
1248                 if((s->avctx->debug_mv) && pict->motion_val){
1249                   int type;
1250                   for(type=0; type<3; type++){
1251                     int direction = 0;
1252                     switch (type) {
1253                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1254                                 continue;
1255                               direction = 0;
1256                               break;
1257                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1258                                 continue;
1259                               direction = 0;
1260                               break;
1261                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1262                                 continue;
1263                               direction = 1;
1264                               break;
1265                     }
1266                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1267                         continue;
1268
1269                     if(IS_8X8(pict->mb_type[mb_index])){
1270                       int i;
1271                       for(i=0; i<4; i++){
1272                         int sx= mb_x*16 + 4 + 8*(i&1);
1273                         int sy= mb_y*16 + 4 + 8*(i>>1);
1274                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1275                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1276                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1277                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1278                       }
1279                     }else if(IS_16X8(pict->mb_type[mb_index])){
1280                       int i;
1281                       for(i=0; i<2; i++){
1282                         int sx=mb_x*16 + 8;
1283                         int sy=mb_y*16 + 4 + 8*i;
1284                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1285                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1286                         int my=(pict->motion_val[direction][xy][1]>>shift);
1287
1288                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1289                             my*=2;
1290
1291                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1292                       }
1293                     }else if(IS_8X16(pict->mb_type[mb_index])){
1294                       int i;
1295                       for(i=0; i<2; i++){
1296                         int sx=mb_x*16 + 4 + 8*i;
1297                         int sy=mb_y*16 + 8;
1298                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1299                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1300                         int my=(pict->motion_val[direction][xy][1]>>shift);
1301
1302                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1303                             my*=2;
1304
1305                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1306                       }
1307                     }else{
1308                       int sx= mb_x*16 + 8;
1309                       int sy= mb_y*16 + 8;
1310                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1311                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1312                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1313                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1314                     }
1315                   }
1316                 }
1317                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1318                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1319                     int y;
1320                     for(y=0; y<8; y++){
1321                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1322                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1323                     }
1324                 }
1325                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1326                     int mb_type= pict->mb_type[mb_index];
1327                     uint64_t u,v;
1328                     int y;
1329 #define COLOR(theta, r)\
1330 u= (int)(128 + r*cos(theta*3.141592/180));\
1331 v= (int)(128 + r*sin(theta*3.141592/180));
1332
1333
1334                     u=v=128;
1335                     if(IS_PCM(mb_type)){
1336                         COLOR(120,48)
1337                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1338                         COLOR(30,48)
1339                     }else if(IS_INTRA4x4(mb_type)){
1340                         COLOR(90,48)
1341                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1342 //                        COLOR(120,48)
1343                     }else if(IS_DIRECT(mb_type)){
1344                         COLOR(150,48)
1345                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1346                         COLOR(170,48)
1347                     }else if(IS_GMC(mb_type)){
1348                         COLOR(190,48)
1349                     }else if(IS_SKIP(mb_type)){
1350 //                        COLOR(180,48)
1351                     }else if(!USES_LIST(mb_type, 1)){
1352                         COLOR(240,48)
1353                     }else if(!USES_LIST(mb_type, 0)){
1354                         COLOR(0,48)
1355                     }else{
1356                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1357                         COLOR(300,48)
1358                     }
1359
1360                     u*= 0x0101010101010101ULL;
1361                     v*= 0x0101010101010101ULL;
1362                     for(y=0; y<8; y++){
1363                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1364                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1365                     }
1366
1367                     //segmentation
1368                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1369                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1370                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1371                     }
1372                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1373                         for(y=0; y<16; y++)
1374                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1375                     }
1376                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1377                         int dm= 1 << (mv_sample_log2-2);
1378                         for(i=0; i<4; i++){
1379                             int sx= mb_x*16 + 8*(i&1);
1380                             int sy= mb_y*16 + 8*(i>>1);
1381                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1382                             //FIXME bidir
1383                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1384                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1385                                 for(y=0; y<8; y++)
1386                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1387                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1388                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1389                         }
1390                     }
1391
1392                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1393                         // hmm
1394                     }
1395                 }
1396                 s->mbskip_table[mb_index]=0;
1397             }
1398         }
1399     }
1400 }
1401
1402 /**
1403  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1404  * @param buf destination buffer
1405  * @param src source buffer
1406  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1407  * @param block_w width of block
1408  * @param block_h height of block
1409  * @param src_x x coordinate of the top left sample of the block in the source buffer
1410  * @param src_y y coordinate of the top left sample of the block in the source buffer
1411  * @param w width of the source buffer
1412  * @param h height of the source buffer
1413  */
1414 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1415                                     int src_x, int src_y, int w, int h){
1416     int x, y;
1417     int start_y, start_x, end_y, end_x;
1418
1419     if(src_y>= h){
1420         src+= (h-1-src_y)*linesize;
1421         src_y=h-1;
1422     }else if(src_y<=-block_h){
1423         src+= (1-block_h-src_y)*linesize;
1424         src_y=1-block_h;
1425     }
1426     if(src_x>= w){
1427         src+= (w-1-src_x);
1428         src_x=w-1;
1429     }else if(src_x<=-block_w){
1430         src+= (1-block_w-src_x);
1431         src_x=1-block_w;
1432     }
1433
1434     start_y= FFMAX(0, -src_y);
1435     start_x= FFMAX(0, -src_x);
1436     end_y= FFMIN(block_h, h-src_y);
1437     end_x= FFMIN(block_w, w-src_x);
1438
1439     // copy existing part
1440     for(y=start_y; y<end_y; y++){
1441         for(x=start_x; x<end_x; x++){
1442             buf[x + y*linesize]= src[x + y*linesize];
1443         }
1444     }
1445
1446     //top
1447     for(y=0; y<start_y; y++){
1448         for(x=start_x; x<end_x; x++){
1449             buf[x + y*linesize]= buf[x + start_y*linesize];
1450         }
1451     }
1452
1453     //bottom
1454     for(y=end_y; y<block_h; y++){
1455         for(x=start_x; x<end_x; x++){
1456             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1457         }
1458     }
1459
1460     for(y=0; y<block_h; y++){
1461        //left
1462         for(x=0; x<start_x; x++){
1463             buf[x + y*linesize]= buf[start_x + y*linesize];
1464         }
1465
1466        //right
1467         for(x=end_x; x<block_w; x++){
1468             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1469         }
1470     }
1471 }
1472
1473 static inline int hpel_motion_lowres(MpegEncContext *s,
1474                                   uint8_t *dest, uint8_t *src,
1475                                   int field_based, int field_select,
1476                                   int src_x, int src_y,
1477                                   int width, int height, int stride,
1478                                   int h_edge_pos, int v_edge_pos,
1479                                   int w, int h, h264_chroma_mc_func *pix_op,
1480                                   int motion_x, int motion_y)
1481 {
1482     const int lowres= s->avctx->lowres;
1483     const int s_mask= (2<<lowres)-1;
1484     int emu=0;
1485     int sx, sy;
1486
1487     if(s->quarter_sample){
1488         motion_x/=2;
1489         motion_y/=2;
1490     }
1491
1492     sx= motion_x & s_mask;
1493     sy= motion_y & s_mask;
1494     src_x += motion_x >> (lowres+1);
1495     src_y += motion_y >> (lowres+1);
1496
1497     src += src_y * stride + src_x;
1498
1499     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1500        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1501         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1502                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1503         src= s->edge_emu_buffer;
1504         emu=1;
1505     }
1506
1507     sx <<= 2 - lowres;
1508     sy <<= 2 - lowres;
1509     if(field_select)
1510         src += s->linesize;
1511     pix_op[lowres](dest, src, stride, h, sx, sy);
1512     return emu;
1513 }
1514
1515 /* apply one mpeg motion vector to the three components */
1516 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1517                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1518                                int field_based, int bottom_field, int field_select,
1519                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1520                                int motion_x, int motion_y, int h)
1521 {
1522     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1523     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1524     const int lowres= s->avctx->lowres;
1525     const int block_s= 8>>lowres;
1526     const int s_mask= (2<<lowres)-1;
1527     const int h_edge_pos = s->h_edge_pos >> lowres;
1528     const int v_edge_pos = s->v_edge_pos >> lowres;
1529     linesize   = s->current_picture.linesize[0] << field_based;
1530     uvlinesize = s->current_picture.linesize[1] << field_based;
1531
1532     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
1533         motion_x/=2;
1534         motion_y/=2;
1535     }
1536
1537     if(field_based){
1538         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1539     }
1540
1541     sx= motion_x & s_mask;
1542     sy= motion_y & s_mask;
1543     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1544     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1545
1546     if (s->out_format == FMT_H263) {
1547         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1548         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1549         uvsrc_x = src_x>>1;
1550         uvsrc_y = src_y>>1;
1551     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1552         mx = motion_x / 4;
1553         my = motion_y / 4;
1554         uvsx = (2*mx) & s_mask;
1555         uvsy = (2*my) & s_mask;
1556         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1557         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
1558     } else {
1559         mx = motion_x / 2;
1560         my = motion_y / 2;
1561         uvsx = mx & s_mask;
1562         uvsy = my & s_mask;
1563         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1564         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
1565     }
1566
1567     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1568     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1569     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1570
1571     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1572        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1573             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1574                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1575             ptr_y = s->edge_emu_buffer;
1576             if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1577                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1578                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1579                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1580                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1581                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1582                 ptr_cb= uvbuf;
1583                 ptr_cr= uvbuf+16;
1584             }
1585     }
1586
1587     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1588         dest_y += s->linesize;
1589         dest_cb+= s->uvlinesize;
1590         dest_cr+= s->uvlinesize;
1591     }
1592
1593     if(field_select){
1594         ptr_y += s->linesize;
1595         ptr_cb+= s->uvlinesize;
1596         ptr_cr+= s->uvlinesize;
1597     }
1598
1599     sx <<= 2 - lowres;
1600     sy <<= 2 - lowres;
1601     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1602
1603     if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1604         uvsx <<= 2 - lowres;
1605         uvsy <<= 2 - lowres;
1606         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1607         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1608     }
1609     //FIXME h261 lowres loop filter
1610 }
1611
1612 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1613                                      uint8_t *dest_cb, uint8_t *dest_cr,
1614                                      uint8_t **ref_picture,
1615                                      h264_chroma_mc_func *pix_op,
1616                                      int mx, int my){
1617     const int lowres= s->avctx->lowres;
1618     const int block_s= 8>>lowres;
1619     const int s_mask= (2<<lowres)-1;
1620     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1621     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1622     int emu=0, src_x, src_y, offset, sx, sy;
1623     uint8_t *ptr;
1624
1625     if(s->quarter_sample){
1626         mx/=2;
1627         my/=2;
1628     }
1629
1630     /* In case of 8X8, we construct a single chroma motion vector
1631        with a special rounding */
1632     mx= ff_h263_round_chroma(mx);
1633     my= ff_h263_round_chroma(my);
1634
1635     sx= mx & s_mask;
1636     sy= my & s_mask;
1637     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1638     src_y = s->mb_y*block_s + (my >> (lowres+1));
1639
1640     offset = src_y * s->uvlinesize + src_x;
1641     ptr = ref_picture[1] + offset;
1642     if(s->flags&CODEC_FLAG_EMU_EDGE){
1643         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1644            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1645             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1646             ptr= s->edge_emu_buffer;
1647             emu=1;
1648         }
1649     }
1650     sx <<= 2 - lowres;
1651     sy <<= 2 - lowres;
1652     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1653
1654     ptr = ref_picture[2] + offset;
1655     if(emu){
1656         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1657         ptr= s->edge_emu_buffer;
1658     }
1659     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1660 }
1661
1662 /**
1663  * motion compensation of a single macroblock
1664  * @param s context
1665  * @param dest_y luma destination pointer
1666  * @param dest_cb chroma cb/u destination pointer
1667  * @param dest_cr chroma cr/v destination pointer
1668  * @param dir direction (0->forward, 1->backward)
1669  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1670  * @param pic_op halfpel motion compensation function (average or put normally)
1671  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1672  */
1673 static inline void MPV_motion_lowres(MpegEncContext *s,
1674                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1675                               int dir, uint8_t **ref_picture,
1676                               h264_chroma_mc_func *pix_op)
1677 {
1678     int mx, my;
1679     int mb_x, mb_y, i;
1680     const int lowres= s->avctx->lowres;
1681     const int block_s= 8>>lowres;
1682
1683     mb_x = s->mb_x;
1684     mb_y = s->mb_y;
1685
1686     switch(s->mv_type) {
1687     case MV_TYPE_16X16:
1688         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1689                     0, 0, 0,
1690                     ref_picture, pix_op,
1691                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1692         break;
1693     case MV_TYPE_8X8:
1694         mx = 0;
1695         my = 0;
1696             for(i=0;i<4;i++) {
1697                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1698                             ref_picture[0], 0, 0,
1699                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1700                             s->width, s->height, s->linesize,
1701                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1702                             block_s, block_s, pix_op,
1703                             s->mv[dir][i][0], s->mv[dir][i][1]);
1704
1705                 mx += s->mv[dir][i][0];
1706                 my += s->mv[dir][i][1];
1707             }
1708
1709         if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1710             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1711         break;
1712     case MV_TYPE_FIELD:
1713         if (s->picture_structure == PICT_FRAME) {
1714             /* top field */
1715             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1716                         1, 0, s->field_select[dir][0],
1717                         ref_picture, pix_op,
1718                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
1719             /* bottom field */
1720             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1721                         1, 1, s->field_select[dir][1],
1722                         ref_picture, pix_op,
1723                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
1724         } else {
1725             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
1726                 ref_picture= s->current_picture_ptr->data;
1727             }
1728
1729             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1730                         0, 0, s->field_select[dir][0],
1731                         ref_picture, pix_op,
1732                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1733         }
1734         break;
1735     case MV_TYPE_16X8:
1736         for(i=0; i<2; i++){
1737             uint8_t ** ref2picture;
1738
1739             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
1740                 ref2picture= ref_picture;
1741             }else{
1742                 ref2picture= s->current_picture_ptr->data;
1743             }
1744
1745             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1746                         0, 0, s->field_select[dir][i],
1747                         ref2picture, pix_op,
1748                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
1749
1750             dest_y += 2*block_s*s->linesize;
1751             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1752             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1753         }
1754         break;
1755     case MV_TYPE_DMV:
1756         if(s->picture_structure == PICT_FRAME){
1757             for(i=0; i<2; i++){
1758                 int j;
1759                 for(j=0; j<2; j++){
1760                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1761                                 1, j, j^i,
1762                                 ref_picture, pix_op,
1763                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
1764                 }
1765                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1766             }
1767         }else{
1768             for(i=0; i<2; i++){
1769                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1770                             0, 0, s->picture_structure != i+1,
1771                             ref_picture, pix_op,
1772                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
1773
1774                 // after put we make avg of the same block
1775                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1776
1777                 //opposite parity is always in the same frame if this is second field
1778                 if(!s->first_field){
1779                     ref_picture = s->current_picture_ptr->data;
1780                 }
1781             }
1782         }
1783     break;
1784     default: assert(0);
1785     }
1786 }
1787
1788 /* put block[] to dest[] */
1789 static inline void put_dct(MpegEncContext *s,
1790                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1791 {
1792     s->dct_unquantize_intra(s, block, i, qscale);
1793     s->dsp.idct_put (dest, line_size, block);
1794 }
1795
1796 /* add block[] to dest[] */
1797 static inline void add_dct(MpegEncContext *s,
1798                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1799 {
1800     if (s->block_last_index[i] >= 0) {
1801         s->dsp.idct_add (dest, line_size, block);
1802     }
1803 }
1804
1805 static inline void add_dequant_dct(MpegEncContext *s,
1806                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1807 {
1808     if (s->block_last_index[i] >= 0) {
1809         s->dct_unquantize_inter(s, block, i, qscale);
1810
1811         s->dsp.idct_add (dest, line_size, block);
1812     }
1813 }
1814
1815 /**
1816  * cleans dc, ac, coded_block for the current non intra MB
1817  */
1818 void ff_clean_intra_table_entries(MpegEncContext *s)
1819 {
1820     int wrap = s->b8_stride;
1821     int xy = s->block_index[0];
1822
1823     s->dc_val[0][xy           ] =
1824     s->dc_val[0][xy + 1       ] =
1825     s->dc_val[0][xy     + wrap] =
1826     s->dc_val[0][xy + 1 + wrap] = 1024;
1827     /* ac pred */
1828     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1829     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1830     if (s->msmpeg4_version>=3) {
1831         s->coded_block[xy           ] =
1832         s->coded_block[xy + 1       ] =
1833         s->coded_block[xy     + wrap] =
1834         s->coded_block[xy + 1 + wrap] = 0;
1835     }
1836     /* chroma */
1837     wrap = s->mb_stride;
1838     xy = s->mb_x + s->mb_y * wrap;
1839     s->dc_val[1][xy] =
1840     s->dc_val[2][xy] = 1024;
1841     /* ac pred */
1842     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1843     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1844
1845     s->mbintra_table[xy]= 0;
1846 }
1847
1848 /* generic function called after a macroblock has been parsed by the
1849    decoder or after it has been encoded by the encoder.
1850
1851    Important variables used:
1852    s->mb_intra : true if intra macroblock
1853    s->mv_dir   : motion vector direction
1854    s->mv_type  : motion vector type
1855    s->mv       : motion vector
1856    s->interlaced_dct : true if interlaced dct used (mpeg2)
1857  */
1858 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
1859 {
1860     int mb_x, mb_y;
1861     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1862 #ifdef HAVE_XVMC
1863     if(s->avctx->xvmc_acceleration){
1864         XVMC_decode_mb(s);//xvmc uses pblocks
1865         return;
1866     }
1867 #endif
1868
1869     mb_x = s->mb_x;
1870     mb_y = s->mb_y;
1871
1872     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1873        /* save DCT coefficients */
1874        int i,j;
1875        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1876        for(i=0; i<6; i++)
1877            for(j=0; j<64; j++)
1878                *dct++ = block[i][s->dsp.idct_permutation[j]];
1879     }
1880
1881     s->current_picture.qscale_table[mb_xy]= s->qscale;
1882
1883     /* update DC predictors for P macroblocks */
1884     if (!s->mb_intra) {
1885         if (s->h263_pred || s->h263_aic) {
1886             if(s->mbintra_table[mb_xy])
1887                 ff_clean_intra_table_entries(s);
1888         } else {
1889             s->last_dc[0] =
1890             s->last_dc[1] =
1891             s->last_dc[2] = 128 << s->intra_dc_precision;
1892         }
1893     }
1894     else if (s->h263_pred || s->h263_aic)
1895         s->mbintra_table[mb_xy]=1;
1896
1897     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1898         uint8_t *dest_y, *dest_cb, *dest_cr;
1899         int dct_linesize, dct_offset;
1900         op_pixels_func (*op_pix)[4];
1901         qpel_mc_func (*op_qpix)[16];
1902         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1903         const int uvlinesize= s->current_picture.linesize[1];
1904         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1905         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1906
1907         /* avoid copy if macroblock skipped in last frame too */
1908         /* skip only during decoding as we might trash the buffers during encoding a bit */
1909         if(!s->encoding){
1910             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1911             const int age= s->current_picture.age;
1912
1913             assert(age);
1914
1915             if (s->mb_skipped) {
1916                 s->mb_skipped= 0;
1917                 assert(s->pict_type!=I_TYPE);
1918
1919                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1920                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1921
1922                 /* if previous was skipped too, then nothing to do !  */
1923                 if (*mbskip_ptr >= age && s->current_picture.reference){
1924                     return;
1925                 }
1926             } else if(!s->current_picture.reference){
1927                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1928                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1929             } else{
1930                 *mbskip_ptr = 0; /* not skipped */
1931             }
1932         }
1933
1934         dct_linesize = linesize << s->interlaced_dct;
1935         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1936
1937         if(readable){
1938             dest_y=  s->dest[0];
1939             dest_cb= s->dest[1];
1940             dest_cr= s->dest[2];
1941         }else{
1942             dest_y = s->b_scratchpad;
1943             dest_cb= s->b_scratchpad+16*linesize;
1944             dest_cr= s->b_scratchpad+32*linesize;
1945         }
1946
1947         if (!s->mb_intra) {
1948             /* motion handling */
1949             /* decoding or more than one mb_type (MC was already done otherwise) */
1950             if(!s->encoding){
1951                 if(lowres_flag){
1952                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1953
1954                     if (s->mv_dir & MV_DIR_FORWARD) {
1955                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1956                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1957                     }
1958                     if (s->mv_dir & MV_DIR_BACKWARD) {
1959                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1960                     }
1961                 }else{
1962                     op_qpix= s->me.qpel_put;
1963                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
1964                         op_pix = s->dsp.put_pixels_tab;
1965                     }else{
1966                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1967                     }
1968                     if (s->mv_dir & MV_DIR_FORWARD) {
1969                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1970                         op_pix = s->dsp.avg_pixels_tab;
1971                         op_qpix= s->me.qpel_avg;
1972                     }
1973                     if (s->mv_dir & MV_DIR_BACKWARD) {
1974                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1975                     }
1976                 }
1977             }
1978
1979             /* skip dequant / idct if we are really late ;) */
1980             if(s->hurry_up>1) goto skip_idct;
1981             if(s->avctx->skip_idct){
1982                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
1983                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
1984                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1985                     goto skip_idct;
1986             }
1987
1988             /* add dct residue */
1989             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1990                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1991                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1992                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1993                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1994                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1995
1996                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1997                     if (s->chroma_y_shift){
1998                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1999                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2000                     }else{
2001                         dct_linesize >>= 1;
2002                         dct_offset >>=1;
2003                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2004                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2005                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2006                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2007                     }
2008                 }
2009             } else if(s->codec_id != CODEC_ID_WMV2){
2010                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
2011                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
2012                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
2013                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
2014
2015                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2016                     if(s->chroma_y_shift){//Chroma420
2017                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
2018                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2019                     }else{
2020                         //chroma422
2021                         dct_linesize = uvlinesize << s->interlaced_dct;
2022                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2023
2024                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2025                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2026                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2027                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2028                         if(!s->chroma_x_shift){//Chroma444
2029                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2030                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2031                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2032                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2033                         }
2034                     }
2035                 }//fi gray
2036             }
2037             else if (ENABLE_WMV2) {
2038                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2039             }
2040         } else {
2041             /* dct only in intra block */
2042             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2043                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2044                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2045                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2046                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2047
2048                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2049                     if(s->chroma_y_shift){
2050                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2051                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2052                     }else{
2053                         dct_offset >>=1;
2054                         dct_linesize >>=1;
2055                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2056                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2057                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2058                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2059                     }
2060                 }
2061             }else{
2062                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2063                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2064                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2065                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2066
2067                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2068                     if(s->chroma_y_shift){
2069                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2070                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2071                     }else{
2072
2073                         dct_linesize = uvlinesize << s->interlaced_dct;
2074                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2075
2076                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2077                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2078                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2079                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2080                         if(!s->chroma_x_shift){//Chroma444
2081                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2082                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2083                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2084                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2085                         }
2086                     }
2087                 }//gray
2088             }
2089         }
2090 skip_idct:
2091         if(!readable){
2092             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2093             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2094             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2095         }
2096     }
2097 }
2098
2099 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2100     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
2101     else                  MPV_decode_mb_internal(s, block, 0);
2102 }
2103
2104 /**
2105  *
2106  * @param h is the normal height, this will be reduced automatically if needed for the last row
2107  */
2108 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2109     if (s->avctx->draw_horiz_band) {
2110         AVFrame *src;
2111         int offset[4];
2112
2113         if(s->picture_structure != PICT_FRAME){
2114             h <<= 1;
2115             y <<= 1;
2116             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2117         }
2118
2119         h= FFMIN(h, s->avctx->height - y);
2120
2121         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2122             src= (AVFrame*)s->current_picture_ptr;
2123         else if(s->last_picture_ptr)
2124             src= (AVFrame*)s->last_picture_ptr;
2125         else
2126             return;
2127
2128         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2129             offset[0]=
2130             offset[1]=
2131             offset[2]=
2132             offset[3]= 0;
2133         }else{
2134             offset[0]= y * s->linesize;;
2135             offset[1]=
2136             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2137             offset[3]= 0;
2138         }
2139
2140         emms_c();
2141
2142         s->avctx->draw_horiz_band(s->avctx, src, offset,
2143                                   y, s->picture_structure, h);
2144     }
2145 }
2146
2147 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2148     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2149     const int uvlinesize= s->current_picture.linesize[1];
2150     const int mb_size= 4 - s->avctx->lowres;
2151
2152     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2153     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2154     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2155     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2156     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2157     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2158     //block_index is not used by mpeg2, so it is not affected by chroma_format
2159
2160     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2161     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2162     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2163
2164     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2165     {
2166         s->dest[0] += s->mb_y *   linesize << mb_size;
2167         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2168         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2169     }
2170 }
2171
2172 void ff_mpeg_flush(AVCodecContext *avctx){
2173     int i;
2174     MpegEncContext *s = avctx->priv_data;
2175
2176     if(s==NULL || s->picture==NULL)
2177         return;
2178
2179     for(i=0; i<MAX_PICTURE_COUNT; i++){
2180        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2181                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2182         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
2183     }
2184     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2185
2186     s->mb_x= s->mb_y= 0;
2187
2188     s->parse_context.state= -1;
2189     s->parse_context.frame_start_found= 0;
2190     s->parse_context.overread= 0;
2191     s->parse_context.overread_index= 0;
2192     s->parse_context.index= 0;
2193     s->parse_context.last_index= 0;
2194     s->bitstream_buffer_size=0;
2195     s->pp_time=0;
2196 }
2197
2198 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2199                                    DCTELEM *block, int n, int qscale)
2200 {
2201     int i, level, nCoeffs;
2202     const uint16_t *quant_matrix;
2203
2204     nCoeffs= s->block_last_index[n];
2205
2206     if (n < 4)
2207         block[0] = block[0] * s->y_dc_scale;
2208     else
2209         block[0] = block[0] * s->c_dc_scale;
2210     /* XXX: only mpeg1 */
2211     quant_matrix = s->intra_matrix;
2212     for(i=1;i<=nCoeffs;i++) {
2213         int j= s->intra_scantable.permutated[i];
2214         level = block[j];
2215         if (level) {
2216             if (level < 0) {
2217                 level = -level;
2218                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2219                 level = (level - 1) | 1;
2220                 level = -level;
2221             } else {
2222                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2223                 level = (level - 1) | 1;
2224             }
2225             block[j] = level;
2226         }
2227     }
2228 }
2229
2230 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2231                                    DCTELEM *block, int n, int qscale)
2232 {
2233     int i, level, nCoeffs;
2234     const uint16_t *quant_matrix;
2235
2236     nCoeffs= s->block_last_index[n];
2237
2238     quant_matrix = s->inter_matrix;
2239     for(i=0; i<=nCoeffs; i++) {
2240         int j= s->intra_scantable.permutated[i];
2241         level = block[j];
2242         if (level) {
2243             if (level < 0) {
2244                 level = -level;
2245                 level = (((level << 1) + 1) * qscale *
2246                          ((int) (quant_matrix[j]))) >> 4;
2247                 level = (level - 1) | 1;
2248                 level = -level;
2249             } else {
2250                 level = (((level << 1) + 1) * qscale *
2251                          ((int) (quant_matrix[j]))) >> 4;
2252                 level = (level - 1) | 1;
2253             }
2254             block[j] = level;
2255         }
2256     }
2257 }
2258
2259 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2260                                    DCTELEM *block, int n, int qscale)
2261 {
2262     int i, level, nCoeffs;
2263     const uint16_t *quant_matrix;
2264
2265     if(s->alternate_scan) nCoeffs= 63;
2266     else nCoeffs= s->block_last_index[n];
2267
2268     if (n < 4)
2269         block[0] = block[0] * s->y_dc_scale;
2270     else
2271         block[0] = block[0] * s->c_dc_scale;
2272     quant_matrix = s->intra_matrix;
2273     for(i=1;i<=nCoeffs;i++) {
2274         int j= s->intra_scantable.permutated[i];
2275         level = block[j];
2276         if (level) {
2277             if (level < 0) {
2278                 level = -level;
2279                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2280                 level = -level;
2281             } else {
2282                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2283             }
2284             block[j] = level;
2285         }
2286     }
2287 }
2288
2289 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2290                                    DCTELEM *block, int n, int qscale)
2291 {
2292     int i, level, nCoeffs;
2293     const uint16_t *quant_matrix;
2294     int sum=-1;
2295
2296     if(s->alternate_scan) nCoeffs= 63;
2297     else nCoeffs= s->block_last_index[n];
2298
2299     if (n < 4)
2300         block[0] = block[0] * s->y_dc_scale;
2301     else
2302         block[0] = block[0] * s->c_dc_scale;
2303     quant_matrix = s->intra_matrix;
2304     for(i=1;i<=nCoeffs;i++) {
2305         int j= s->intra_scantable.permutated[i];
2306         level = block[j];
2307         if (level) {
2308             if (level < 0) {
2309                 level = -level;
2310                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2311                 level = -level;
2312             } else {
2313                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2314             }
2315             block[j] = level;
2316             sum+=level;
2317         }
2318     }
2319     block[63]^=sum&1;
2320 }
2321
2322 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2323                                    DCTELEM *block, int n, int qscale)
2324 {
2325     int i, level, nCoeffs;
2326     const uint16_t *quant_matrix;
2327     int sum=-1;
2328
2329     if(s->alternate_scan) nCoeffs= 63;
2330     else nCoeffs= s->block_last_index[n];
2331
2332     quant_matrix = s->inter_matrix;
2333     for(i=0; i<=nCoeffs; i++) {
2334         int j= s->intra_scantable.permutated[i];
2335         level = block[j];
2336         if (level) {
2337             if (level < 0) {
2338                 level = -level;
2339                 level = (((level << 1) + 1) * qscale *
2340                          ((int) (quant_matrix[j]))) >> 4;
2341                 level = -level;
2342             } else {
2343                 level = (((level << 1) + 1) * qscale *
2344                          ((int) (quant_matrix[j]))) >> 4;
2345             }
2346             block[j] = level;
2347             sum+=level;
2348         }
2349     }
2350     block[63]^=sum&1;
2351 }
2352
2353 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2354                                   DCTELEM *block, int n, int qscale)
2355 {
2356     int i, level, qmul, qadd;
2357     int nCoeffs;
2358
2359     assert(s->block_last_index[n]>=0);
2360
2361     qmul = qscale << 1;
2362
2363     if (!s->h263_aic) {
2364         if (n < 4)
2365             block[0] = block[0] * s->y_dc_scale;
2366         else
2367             block[0] = block[0] * s->c_dc_scale;
2368         qadd = (qscale - 1) | 1;
2369     }else{
2370         qadd = 0;
2371     }
2372     if(s->ac_pred)
2373         nCoeffs=63;
2374     else
2375         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2376
2377     for(i=1; i<=nCoeffs; i++) {
2378         level = block[i];
2379         if (level) {
2380             if (level < 0) {
2381                 level = level * qmul - qadd;
2382             } else {
2383                 level = level * qmul + qadd;
2384             }
2385             block[i] = level;
2386         }
2387     }
2388 }
2389
2390 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2391                                   DCTELEM *block, int n, int qscale)
2392 {
2393     int i, level, qmul, qadd;
2394     int nCoeffs;
2395
2396     assert(s->block_last_index[n]>=0);
2397
2398     qadd = (qscale - 1) | 1;
2399     qmul = qscale << 1;
2400
2401     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2402
2403     for(i=0; i<=nCoeffs; i++) {
2404         level = block[i];
2405         if (level) {
2406             if (level < 0) {
2407                 level = level * qmul - qadd;
2408             } else {
2409                 level = level * qmul + qadd;
2410             }
2411             block[i] = level;
2412         }
2413     }
2414 }
2415
2416 /**
2417  * set qscale and update qscale dependent variables.
2418  */
2419 void ff_set_qscale(MpegEncContext * s, int qscale)
2420 {
2421     if (qscale < 1)
2422         qscale = 1;
2423     else if (qscale > 31)
2424         qscale = 31;
2425
2426     s->qscale = qscale;
2427     s->chroma_qscale= s->chroma_qscale_table[qscale];
2428
2429     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2430     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2431 }