]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
Make b_frame_strategy not fail on pass 2, but still print a notice.
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */
27
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
57                                   DCTELEM *block, int n, int qscale);
58 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
59 #ifdef CONFIG_ENCODERS
60 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
61 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
62 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
63 static int sse_mb(MpegEncContext *s);
64 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
65 #endif //CONFIG_ENCODERS
66
67 #ifdef HAVE_XVMC
68 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
69 extern void XVMC_field_end(MpegEncContext *s);
70 extern void XVMC_decode_mb(MpegEncContext *s);
71 #endif
72
73 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
74
75
76 /* enable all paranoid tests for rounding, overflows, etc... */
77 //#define PARANOID
78
79 //#define DEBUG
80
81
82 /* for jpeg fast DCT */
83 #define CONST_BITS 14
84
85 static const uint16_t aanscales[64] = {
86     /* precomputed values scaled up by 14 bits */
87     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
88     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
89     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
90     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
91     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
92     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
93     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
94     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
95 };
96
97 static const uint8_t h263_chroma_roundtab[16] = {
98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
99     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
100 };
101
102 static const uint8_t ff_default_chroma_qscale_table[32]={
103 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
104     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
105 };
106
107 #ifdef CONFIG_ENCODERS
108 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
109 static uint8_t default_fcode_tab[MAX_MV*2+1];
110
111 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
112
113 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
114                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
115 {
116     int qscale;
117     int shift=0;
118
119     for(qscale=qmin; qscale<=qmax; qscale++){
120         int i;
121         if (dsp->fdct == ff_jpeg_fdct_islow
122 #ifdef FAAN_POSTSCALE
123             || dsp->fdct == ff_faandct
124 #endif
125             ) {
126             for(i=0;i<64;i++) {
127                 const int j= dsp->idct_permutation[i];
128                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
129                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
130                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
131                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
132
133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
134                                 (qscale * quant_matrix[j]));
135             }
136         } else if (dsp->fdct == fdct_ifast
137 #ifndef FAAN_POSTSCALE
138                    || dsp->fdct == ff_faandct
139 #endif
140                    ) {
141             for(i=0;i<64;i++) {
142                 const int j= dsp->idct_permutation[i];
143                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
144                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
145                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
146                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
147
148                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
149                                 (aanscales[i] * qscale * quant_matrix[j]));
150             }
151         } else {
152             for(i=0;i<64;i++) {
153                 const int j= dsp->idct_permutation[i];
154                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
155                    So 16           <= qscale * quant_matrix[i]             <= 7905
156                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
157                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
158                 */
159                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
160 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
161                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
162
163                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
164                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
165             }
166         }
167
168         for(i=intra; i<64; i++){
169             int64_t max= 8191;
170             if (dsp->fdct == fdct_ifast
171 #ifndef FAAN_POSTSCALE
172                    || dsp->fdct == ff_faandct
173 #endif
174                    ) {
175                 max= (8191LL*aanscales[i]) >> 14;
176             }
177             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
178                 shift++;
179             }
180         }
181     }
182     if(shift){
183         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
184     }
185 }
186
187 static inline void update_qscale(MpegEncContext *s){
188     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
189     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
190
191     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
192 }
193 #endif //CONFIG_ENCODERS
194
195 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
196     int i;
197     int end;
198
199     st->scantable= src_scantable;
200
201     for(i=0; i<64; i++){
202         int j;
203         j = src_scantable[i];
204         st->permutated[i] = permutation[j];
205 #ifdef ARCH_POWERPC
206         st->inverse[j] = i;
207 #endif
208     }
209
210     end=-1;
211     for(i=0; i<64; i++){
212         int j;
213         j = st->permutated[i];
214         if(j>end) end=j;
215         st->raster_end[i]= end;
216     }
217 }
218
219 #ifdef CONFIG_ENCODERS
220 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
221     int i;
222
223     if(matrix){
224         put_bits(pb, 1, 1);
225         for(i=0;i<64;i++) {
226             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
227         }
228     }else
229         put_bits(pb, 1, 0);
230 }
231 #endif //CONFIG_ENCODERS
232
233 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
234     int i;
235
236     assert(p<=end);
237     if(p>=end)
238         return end;
239
240     for(i=0; i<3; i++){
241         uint32_t tmp= *state << 8;
242         *state= tmp + *(p++);
243         if(tmp == 0x100 || p==end)
244             return p;
245     }
246
247     while(p<end){
248         if     (p[-1] > 1      ) p+= 3;
249         else if(p[-2]          ) p+= 2;
250         else if(p[-3]|(p[-1]-1)) p++;
251         else{
252             p++;
253             break;
254         }
255     }
256
257     p= FFMIN(p, end)-4;
258     *state=  be2me_32(unaligned32(p));
259
260     return p+4;
261 }
262
263 /* init common dct for both encoder and decoder */
264 int DCT_common_init(MpegEncContext *s)
265 {
266     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
267     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
268     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
269     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
270     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
271     if(s->flags & CODEC_FLAG_BITEXACT)
272         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
273     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
274
275 #ifdef CONFIG_ENCODERS
276     s->dct_quantize= dct_quantize_c;
277     s->denoise_dct= denoise_dct_c;
278 #endif //CONFIG_ENCODERS
279
280 #ifdef HAVE_MMX
281     MPV_common_init_mmx(s);
282 #endif
283 #ifdef ARCH_ALPHA
284     MPV_common_init_axp(s);
285 #endif
286 #ifdef HAVE_MLIB
287     MPV_common_init_mlib(s);
288 #endif
289 #ifdef HAVE_MMI
290     MPV_common_init_mmi(s);
291 #endif
292 #ifdef ARCH_ARMV4L
293     MPV_common_init_armv4l(s);
294 #endif
295 #ifdef ARCH_POWERPC
296     MPV_common_init_ppc(s);
297 #endif
298
299 #ifdef CONFIG_ENCODERS
300     s->fast_dct_quantize= s->dct_quantize;
301
302     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
303         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
304     }
305
306 #endif //CONFIG_ENCODERS
307
308     /* load & permutate scantables
309        note: only wmv uses different ones
310     */
311     if(s->alternate_scan){
312         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
313         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
314     }else{
315         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
316         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
317     }
318     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
319     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
320
321     return 0;
322 }
323
324 static void copy_picture(Picture *dst, Picture *src){
325     *dst = *src;
326     dst->type= FF_BUFFER_TYPE_COPY;
327 }
328
329 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
330     int i;
331
332     dst->pict_type              = src->pict_type;
333     dst->quality                = src->quality;
334     dst->coded_picture_number   = src->coded_picture_number;
335     dst->display_picture_number = src->display_picture_number;
336 //    dst->reference              = src->reference;
337     dst->pts                    = src->pts;
338     dst->interlaced_frame       = src->interlaced_frame;
339     dst->top_field_first        = src->top_field_first;
340
341     if(s->avctx->me_threshold){
342         if(!src->motion_val[0])
343             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
344         if(!src->mb_type)
345             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
346         if(!src->ref_index[0])
347             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
348         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
349             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
350             src->motion_subsample_log2, dst->motion_subsample_log2);
351
352         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
353
354         for(i=0; i<2; i++){
355             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
356             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
357
358             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
359                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
360             }
361             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
362                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
363             }
364         }
365     }
366 }
367
368 /**
369  * allocates a Picture
370  * The pixels are allocated/set by calling get_buffer() if shared=0
371  */
372 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
373     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
374     const int mb_array_size= s->mb_stride*s->mb_height;
375     const int b8_array_size= s->b8_stride*s->mb_height*2;
376     const int b4_array_size= s->b4_stride*s->mb_height*4;
377     int i;
378
379     if(shared){
380         assert(pic->data[0]);
381         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
382         pic->type= FF_BUFFER_TYPE_SHARED;
383     }else{
384         int r;
385
386         assert(!pic->data[0]);
387
388         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
389
390         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
391             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
392             return -1;
393         }
394
395         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
396             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
397             return -1;
398         }
399
400         if(pic->linesize[1] != pic->linesize[2]){
401             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
402             return -1;
403         }
404
405         s->linesize  = pic->linesize[0];
406         s->uvlinesize= pic->linesize[1];
407     }
408
409     if(pic->qscale_table==NULL){
410         if (s->encoding) {
411             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
412             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
413             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
414         }
415
416         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
417         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
418         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
419         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
420         if(s->out_format == FMT_H264){
421             for(i=0; i<2; i++){
422                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
423                 pic->motion_val[i]= pic->motion_val_base[i]+4;
424                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
425             }
426             pic->motion_subsample_log2= 2;
427         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
428             for(i=0; i<2; i++){
429                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
430                 pic->motion_val[i]= pic->motion_val_base[i]+4;
431                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
432             }
433             pic->motion_subsample_log2= 3;
434         }
435         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
436             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
437         }
438         pic->qstride= s->mb_stride;
439         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
440     }
441
442     //it might be nicer if the application would keep track of these but it would require a API change
443     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
444     s->prev_pict_types[0]= s->pict_type;
445     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
446         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
447
448     return 0;
449 fail: //for the CHECKED_ALLOCZ macro
450     return -1;
451 }
452
453 /**
454  * deallocates a picture
455  */
456 static void free_picture(MpegEncContext *s, Picture *pic){
457     int i;
458
459     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
460         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
461     }
462
463     av_freep(&pic->mb_var);
464     av_freep(&pic->mc_mb_var);
465     av_freep(&pic->mb_mean);
466     av_freep(&pic->mbskip_table);
467     av_freep(&pic->qscale_table);
468     av_freep(&pic->mb_type_base);
469     av_freep(&pic->dct_coeff);
470     av_freep(&pic->pan_scan);
471     pic->mb_type= NULL;
472     for(i=0; i<2; i++){
473         av_freep(&pic->motion_val_base[i]);
474         av_freep(&pic->ref_index[i]);
475     }
476
477     if(pic->type == FF_BUFFER_TYPE_SHARED){
478         for(i=0; i<4; i++){
479             pic->base[i]=
480             pic->data[i]= NULL;
481         }
482         pic->type= 0;
483     }
484 }
485
486 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
487     int i;
488
489     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
490     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
491     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
492
493      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
494     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
495     s->rd_scratchpad=   s->me.scratchpad;
496     s->b_scratchpad=    s->me.scratchpad;
497     s->obmc_scratchpad= s->me.scratchpad + 16;
498     if (s->encoding) {
499         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
500         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
501         if(s->avctx->noise_reduction){
502             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
503         }
504     }
505     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
506     s->block= s->blocks[0];
507
508     for(i=0;i<12;i++){
509         s->pblocks[i] = (short *)(&s->block[i]);
510     }
511     return 0;
512 fail:
513     return -1; //free() through MPV_common_end()
514 }
515
516 static void free_duplicate_context(MpegEncContext *s){
517     if(s==NULL) return;
518
519     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
520     av_freep(&s->me.scratchpad);
521     s->rd_scratchpad=
522     s->b_scratchpad=
523     s->obmc_scratchpad= NULL;
524
525     av_freep(&s->dct_error_sum);
526     av_freep(&s->me.map);
527     av_freep(&s->me.score_map);
528     av_freep(&s->blocks);
529     s->block= NULL;
530 }
531
532 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
533 #define COPY(a) bak->a= src->a
534     COPY(allocated_edge_emu_buffer);
535     COPY(edge_emu_buffer);
536     COPY(me.scratchpad);
537     COPY(rd_scratchpad);
538     COPY(b_scratchpad);
539     COPY(obmc_scratchpad);
540     COPY(me.map);
541     COPY(me.score_map);
542     COPY(blocks);
543     COPY(block);
544     COPY(start_mb_y);
545     COPY(end_mb_y);
546     COPY(me.map_generation);
547     COPY(pb);
548     COPY(dct_error_sum);
549     COPY(dct_count[0]);
550     COPY(dct_count[1]);
551 #undef COPY
552 }
553
554 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
555     MpegEncContext bak;
556     int i;
557     //FIXME copy only needed parts
558 //START_TIMER
559     backup_duplicate_context(&bak, dst);
560     memcpy(dst, src, sizeof(MpegEncContext));
561     backup_duplicate_context(dst, &bak);
562     for(i=0;i<12;i++){
563         dst->pblocks[i] = (short *)(&dst->block[i]);
564     }
565 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
566 }
567
568 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
569 #define COPY(a) dst->a= src->a
570     COPY(pict_type);
571     COPY(current_picture);
572     COPY(f_code);
573     COPY(b_code);
574     COPY(qscale);
575     COPY(lambda);
576     COPY(lambda2);
577     COPY(picture_in_gop_number);
578     COPY(gop_picture_number);
579     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
580     COPY(progressive_frame); //FIXME don't set in encode_header
581     COPY(partitioned_frame); //FIXME don't set in encode_header
582 #undef COPY
583 }
584
585 /**
586  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
587  * the changed fields will not depend upon the prior state of the MpegEncContext.
588  */
589 static void MPV_common_defaults(MpegEncContext *s){
590     s->y_dc_scale_table=
591     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
592     s->chroma_qscale_table= ff_default_chroma_qscale_table;
593     s->progressive_frame= 1;
594     s->progressive_sequence= 1;
595     s->picture_structure= PICT_FRAME;
596
597     s->coded_picture_number = 0;
598     s->picture_number = 0;
599     s->input_picture_number = 0;
600
601     s->picture_in_gop_number = 0;
602
603     s->f_code = 1;
604     s->b_code = 1;
605 }
606
607 /**
608  * sets the given MpegEncContext to defaults for decoding.
609  * the changed fields will not depend upon the prior state of the MpegEncContext.
610  */
611 void MPV_decode_defaults(MpegEncContext *s){
612     MPV_common_defaults(s);
613 }
614
615 /**
616  * sets the given MpegEncContext to defaults for encoding.
617  * the changed fields will not depend upon the prior state of the MpegEncContext.
618  */
619
620 #ifdef CONFIG_ENCODERS
621 static void MPV_encode_defaults(MpegEncContext *s){
622     static int done=0;
623
624     MPV_common_defaults(s);
625
626     if(!done){
627         int i;
628         done=1;
629
630         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
631         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
632
633         for(i=-16; i<16; i++){
634             default_fcode_tab[i + MAX_MV]= 1;
635         }
636     }
637     s->me.mv_penalty= default_mv_penalty;
638     s->fcode_tab= default_fcode_tab;
639 }
640 #endif //CONFIG_ENCODERS
641
642 /**
643  * init common structure for both encoder and decoder.
644  * this assumes that some variables like width/height are already set
645  */
646 int MPV_common_init(MpegEncContext *s)
647 {
648     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
649
650     s->mb_height = (s->height + 15) / 16;
651
652     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
653         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
654         return -1;
655     }
656
657     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
658         return -1;
659
660     dsputil_init(&s->dsp, s->avctx);
661     DCT_common_init(s);
662
663     s->flags= s->avctx->flags;
664     s->flags2= s->avctx->flags2;
665
666     s->mb_width  = (s->width  + 15) / 16;
667     s->mb_stride = s->mb_width + 1;
668     s->b8_stride = s->mb_width*2 + 1;
669     s->b4_stride = s->mb_width*4 + 1;
670     mb_array_size= s->mb_height * s->mb_stride;
671     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
672
673     /* set chroma shifts */
674     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
675                                                     &(s->chroma_y_shift) );
676
677     /* set default edge pos, will be overriden in decode_header if needed */
678     s->h_edge_pos= s->mb_width*16;
679     s->v_edge_pos= s->mb_height*16;
680
681     s->mb_num = s->mb_width * s->mb_height;
682
683     s->block_wrap[0]=
684     s->block_wrap[1]=
685     s->block_wrap[2]=
686     s->block_wrap[3]= s->b8_stride;
687     s->block_wrap[4]=
688     s->block_wrap[5]= s->mb_stride;
689
690     y_size = s->b8_stride * (2 * s->mb_height + 1);
691     c_size = s->mb_stride * (s->mb_height + 1);
692     yc_size = y_size + 2 * c_size;
693
694     /* convert fourcc to upper case */
695     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
696                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
697                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
698                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
699
700     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
701                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
702                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
703                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
704
705     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
706
707     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
708     for(y=0; y<s->mb_height; y++){
709         for(x=0; x<s->mb_width; x++){
710             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
711         }
712     }
713     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
714
715     if (s->encoding) {
716         /* Allocate MV tables */
717         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
718         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
719         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
720         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
721         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
722         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
723         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
724         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
725         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
726         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
727         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
728         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
729
730         if(s->msmpeg4_version){
731             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
732         }
733         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
734
735         /* Allocate MB type table */
736         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
737
738         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
739
740         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
741         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
742         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
743         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
744         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
745         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
746
747         if(s->avctx->noise_reduction){
748             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
749         }
750     }
751     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
752
753     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
754
755     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
756         /* interlaced direct mode decoding tables */
757             for(i=0; i<2; i++){
758                 int j, k;
759                 for(j=0; j<2; j++){
760                     for(k=0; k<2; k++){
761                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
762                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
763                     }
764                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
765                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
766                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
767                 }
768                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
769             }
770     }
771     if (s->out_format == FMT_H263) {
772         /* ac values */
773         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
774         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
775         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
776         s->ac_val[2] = s->ac_val[1] + c_size;
777
778         /* cbp values */
779         CHECKED_ALLOCZ(s->coded_block_base, y_size);
780         s->coded_block= s->coded_block_base + s->b8_stride + 1;
781
782         /* cbp, ac_pred, pred_dir */
783         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
784         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
785     }
786
787     if (s->h263_pred || s->h263_plus || !s->encoding) {
788         /* dc values */
789         //MN: we need these for error resilience of intra-frames
790         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
791         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
792         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
793         s->dc_val[2] = s->dc_val[1] + c_size;
794         for(i=0;i<yc_size;i++)
795             s->dc_val_base[i] = 1024;
796     }
797
798     /* which mb is a intra block */
799     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
800     memset(s->mbintra_table, 1, mb_array_size);
801
802     /* init macroblock skip table */
803     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
804     //Note the +1 is for a quicker mpeg4 slice_end detection
805     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
806
807     s->parse_context.state= -1;
808     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
809        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
810        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
811        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
812     }
813
814     s->context_initialized = 1;
815
816     s->thread_context[0]= s;
817     for(i=1; i<s->avctx->thread_count; i++){
818         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
819         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
820     }
821
822     for(i=0; i<s->avctx->thread_count; i++){
823         if(init_duplicate_context(s->thread_context[i], s) < 0)
824            goto fail;
825         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
826         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
827     }
828
829     return 0;
830  fail:
831     MPV_common_end(s);
832     return -1;
833 }
834
835 /* init common structure for both encoder and decoder */
836 void MPV_common_end(MpegEncContext *s)
837 {
838     int i, j, k;
839
840     for(i=0; i<s->avctx->thread_count; i++){
841         free_duplicate_context(s->thread_context[i]);
842     }
843     for(i=1; i<s->avctx->thread_count; i++){
844         av_freep(&s->thread_context[i]);
845     }
846
847     av_freep(&s->parse_context.buffer);
848     s->parse_context.buffer_size=0;
849
850     av_freep(&s->mb_type);
851     av_freep(&s->p_mv_table_base);
852     av_freep(&s->b_forw_mv_table_base);
853     av_freep(&s->b_back_mv_table_base);
854     av_freep(&s->b_bidir_forw_mv_table_base);
855     av_freep(&s->b_bidir_back_mv_table_base);
856     av_freep(&s->b_direct_mv_table_base);
857     s->p_mv_table= NULL;
858     s->b_forw_mv_table= NULL;
859     s->b_back_mv_table= NULL;
860     s->b_bidir_forw_mv_table= NULL;
861     s->b_bidir_back_mv_table= NULL;
862     s->b_direct_mv_table= NULL;
863     for(i=0; i<2; i++){
864         for(j=0; j<2; j++){
865             for(k=0; k<2; k++){
866                 av_freep(&s->b_field_mv_table_base[i][j][k]);
867                 s->b_field_mv_table[i][j][k]=NULL;
868             }
869             av_freep(&s->b_field_select_table[i][j]);
870             av_freep(&s->p_field_mv_table_base[i][j]);
871             s->p_field_mv_table[i][j]=NULL;
872         }
873         av_freep(&s->p_field_select_table[i]);
874     }
875
876     av_freep(&s->dc_val_base);
877     av_freep(&s->ac_val_base);
878     av_freep(&s->coded_block_base);
879     av_freep(&s->mbintra_table);
880     av_freep(&s->cbp_table);
881     av_freep(&s->pred_dir_table);
882
883     av_freep(&s->mbskip_table);
884     av_freep(&s->prev_pict_types);
885     av_freep(&s->bitstream_buffer);
886     s->allocated_bitstream_buffer_size=0;
887
888     av_freep(&s->avctx->stats_out);
889     av_freep(&s->ac_stats);
890     av_freep(&s->error_status_table);
891     av_freep(&s->mb_index2xy);
892     av_freep(&s->lambda_table);
893     av_freep(&s->q_intra_matrix);
894     av_freep(&s->q_inter_matrix);
895     av_freep(&s->q_intra_matrix16);
896     av_freep(&s->q_inter_matrix16);
897     av_freep(&s->input_picture);
898     av_freep(&s->reordered_input_picture);
899     av_freep(&s->dct_offset);
900
901     if(s->picture){
902         for(i=0; i<MAX_PICTURE_COUNT; i++){
903             free_picture(s, &s->picture[i]);
904         }
905     }
906     av_freep(&s->picture);
907     s->context_initialized = 0;
908     s->last_picture_ptr=
909     s->next_picture_ptr=
910     s->current_picture_ptr= NULL;
911     s->linesize= s->uvlinesize= 0;
912
913     for(i=0; i<3; i++)
914         av_freep(&s->visualization_buffer[i]);
915
916     avcodec_default_free_buffers(s->avctx);
917 }
918
919 #ifdef CONFIG_ENCODERS
920
921 /* init video encoder */
922 int MPV_encode_init(AVCodecContext *avctx)
923 {
924     MpegEncContext *s = avctx->priv_data;
925     int i;
926     int chroma_h_shift, chroma_v_shift;
927
928     MPV_encode_defaults(s);
929
930     switch (avctx->codec_id) {
931     case CODEC_ID_MPEG2VIDEO:
932         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
933             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
934             return -1;
935         }
936         break;
937     case CODEC_ID_LJPEG:
938     case CODEC_ID_MJPEG:
939         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && (avctx->pix_fmt != PIX_FMT_YUV420P || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
940             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
941             return -1;
942         }
943         break;
944     default:
945         if(avctx->pix_fmt != PIX_FMT_YUV420P){
946             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
947             return -1;
948         }
949     }
950
951     switch (avctx->pix_fmt) {
952     case PIX_FMT_YUVJ422P:
953     case PIX_FMT_YUV422P:
954         s->chroma_format = CHROMA_422;
955         break;
956     case PIX_FMT_YUVJ420P:
957     case PIX_FMT_YUV420P:
958     default:
959         s->chroma_format = CHROMA_420;
960         break;
961     }
962
963     s->bit_rate = avctx->bit_rate;
964     s->width = avctx->width;
965     s->height = avctx->height;
966     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
967         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
968         avctx->gop_size=600;
969     }
970     s->gop_size = avctx->gop_size;
971     s->avctx = avctx;
972     s->flags= avctx->flags;
973     s->flags2= avctx->flags2;
974     s->max_b_frames= avctx->max_b_frames;
975     s->codec_id= avctx->codec->id;
976     s->luma_elim_threshold  = avctx->luma_elim_threshold;
977     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
978     s->strict_std_compliance= avctx->strict_std_compliance;
979     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
980     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
981     s->mpeg_quant= avctx->mpeg_quant;
982     s->rtp_mode= !!avctx->rtp_payload_size;
983     s->intra_dc_precision= avctx->intra_dc_precision;
984     s->user_specified_pts = AV_NOPTS_VALUE;
985
986     if (s->gop_size <= 1) {
987         s->intra_only = 1;
988         s->gop_size = 12;
989     } else {
990         s->intra_only = 0;
991     }
992
993     s->me_method = avctx->me_method;
994
995     /* Fixed QSCALE */
996     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
997
998     s->adaptive_quant= (   s->avctx->lumi_masking
999                         || s->avctx->dark_masking
1000                         || s->avctx->temporal_cplx_masking
1001                         || s->avctx->spatial_cplx_masking
1002                         || s->avctx->p_masking
1003                         || s->avctx->border_masking
1004                         || (s->flags&CODEC_FLAG_QP_RD))
1005                        && !s->fixed_qscale;
1006
1007     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1008     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1009     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1010     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1011
1012     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1013         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1014         return -1;
1015     }
1016
1017     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1018         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1019     }
1020
1021     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1022         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1023         return -1;
1024     }
1025
1026     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1027         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1028         return -1;
1029     }
1030
1031     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1032        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1033        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1034
1035         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1036     }
1037
1038     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1039        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1040         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1041         return -1;
1042     }
1043
1044     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1045         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1046         return -1;
1047     }
1048
1049     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1050         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1051         return -1;
1052     }
1053
1054     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1055         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1056         return -1;
1057     }
1058
1059     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1060         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1061         return -1;
1062     }
1063
1064     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1065         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1066         return -1;
1067     }
1068
1069     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1070        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1071         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1072         return -1;
1073     }
1074
1075     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1076         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1077         return -1;
1078     }
1079
1080     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1081         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1082         return -1;
1083     }
1084
1085     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1086         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1087         return -1;
1088     }
1089
1090     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1091         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1092         return -1;
1093     }
1094
1095     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1096         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1097         return -1;
1098     }
1099
1100     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1101        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1102        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1103         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1104         return -1;
1105     }
1106
1107     if(s->avctx->thread_count > 1)
1108         s->rtp_mode= 1;
1109
1110     if(!avctx->time_base.den || !avctx->time_base.num){
1111         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1112         return -1;
1113     }
1114
1115     i= (INT_MAX/2+128)>>8;
1116     if(avctx->me_threshold >= i){
1117         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1118         return -1;
1119     }
1120     if(avctx->mb_threshold >= i){
1121         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1122         return -1;
1123     }
1124
1125     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1126         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1127         avctx->b_frame_strategy = 0;
1128     }
1129
1130     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1131     if(i > 1){
1132         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1133         avctx->time_base.den /= i;
1134         avctx->time_base.num /= i;
1135 //        return -1;
1136     }
1137
1138     if(s->codec_id==CODEC_ID_MJPEG){
1139         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1140         s->inter_quant_bias= 0;
1141     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1142         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1143         s->inter_quant_bias= 0;
1144     }else{
1145         s->intra_quant_bias=0;
1146         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1147     }
1148
1149     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1150         s->intra_quant_bias= avctx->intra_quant_bias;
1151     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1152         s->inter_quant_bias= avctx->inter_quant_bias;
1153
1154     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1155
1156     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1157         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1158         return -1;
1159     }
1160     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1161
1162     switch(avctx->codec->id) {
1163     case CODEC_ID_MPEG1VIDEO:
1164         s->out_format = FMT_MPEG1;
1165         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1166         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1167         break;
1168     case CODEC_ID_MPEG2VIDEO:
1169         s->out_format = FMT_MPEG1;
1170         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1171         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1172         s->rtp_mode= 1;
1173         break;
1174     case CODEC_ID_LJPEG:
1175     case CODEC_ID_JPEGLS:
1176     case CODEC_ID_MJPEG:
1177         s->out_format = FMT_MJPEG;
1178         s->intra_only = 1; /* force intra only for jpeg */
1179         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1180         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1181         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1182         s->mjpeg_vsample[1] = 1;
1183         s->mjpeg_vsample[2] = 1;
1184         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1185         s->mjpeg_hsample[1] = 1;
1186         s->mjpeg_hsample[2] = 1;
1187         if (mjpeg_init(s) < 0)
1188             return -1;
1189         avctx->delay=0;
1190         s->low_delay=1;
1191         break;
1192     case CODEC_ID_H261:
1193         s->out_format = FMT_H261;
1194         avctx->delay=0;
1195         s->low_delay=1;
1196         break;
1197     case CODEC_ID_H263:
1198         if (h263_get_picture_format(s->width, s->height) == 7) {
1199             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1200             return -1;
1201         }
1202         s->out_format = FMT_H263;
1203         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_H263P:
1208         s->out_format = FMT_H263;
1209         s->h263_plus = 1;
1210         /* Fx */
1211         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1212         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1213         s->modified_quant= s->h263_aic;
1214         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1215         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1216         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1217         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1218         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1219
1220         /* /Fx */
1221         /* These are just to be sure */
1222         avctx->delay=0;
1223         s->low_delay=1;
1224         break;
1225     case CODEC_ID_FLV1:
1226         s->out_format = FMT_H263;
1227         s->h263_flv = 2; /* format = 1; 11-bit codes */
1228         s->unrestricted_mv = 1;
1229         s->rtp_mode=0; /* don't allow GOB */
1230         avctx->delay=0;
1231         s->low_delay=1;
1232         break;
1233     case CODEC_ID_RV10:
1234         s->out_format = FMT_H263;
1235         avctx->delay=0;
1236         s->low_delay=1;
1237         break;
1238     case CODEC_ID_RV20:
1239         s->out_format = FMT_H263;
1240         avctx->delay=0;
1241         s->low_delay=1;
1242         s->modified_quant=1;
1243         s->h263_aic=1;
1244         s->h263_plus=1;
1245         s->loop_filter=1;
1246         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1247         break;
1248     case CODEC_ID_MPEG4:
1249         s->out_format = FMT_H263;
1250         s->h263_pred = 1;
1251         s->unrestricted_mv = 1;
1252         s->low_delay= s->max_b_frames ? 0 : 1;
1253         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1254         break;
1255     case CODEC_ID_MSMPEG4V1:
1256         s->out_format = FMT_H263;
1257         s->h263_msmpeg4 = 1;
1258         s->h263_pred = 1;
1259         s->unrestricted_mv = 1;
1260         s->msmpeg4_version= 1;
1261         avctx->delay=0;
1262         s->low_delay=1;
1263         break;
1264     case CODEC_ID_MSMPEG4V2:
1265         s->out_format = FMT_H263;
1266         s->h263_msmpeg4 = 1;
1267         s->h263_pred = 1;
1268         s->unrestricted_mv = 1;
1269         s->msmpeg4_version= 2;
1270         avctx->delay=0;
1271         s->low_delay=1;
1272         break;
1273     case CODEC_ID_MSMPEG4V3:
1274         s->out_format = FMT_H263;
1275         s->h263_msmpeg4 = 1;
1276         s->h263_pred = 1;
1277         s->unrestricted_mv = 1;
1278         s->msmpeg4_version= 3;
1279         s->flipflop_rounding=1;
1280         avctx->delay=0;
1281         s->low_delay=1;
1282         break;
1283     case CODEC_ID_WMV1:
1284         s->out_format = FMT_H263;
1285         s->h263_msmpeg4 = 1;
1286         s->h263_pred = 1;
1287         s->unrestricted_mv = 1;
1288         s->msmpeg4_version= 4;
1289         s->flipflop_rounding=1;
1290         avctx->delay=0;
1291         s->low_delay=1;
1292         break;
1293     case CODEC_ID_WMV2:
1294         s->out_format = FMT_H263;
1295         s->h263_msmpeg4 = 1;
1296         s->h263_pred = 1;
1297         s->unrestricted_mv = 1;
1298         s->msmpeg4_version= 5;
1299         s->flipflop_rounding=1;
1300         avctx->delay=0;
1301         s->low_delay=1;
1302         break;
1303     default:
1304         return -1;
1305     }
1306
1307     avctx->has_b_frames= !s->low_delay;
1308
1309     s->encoding = 1;
1310
1311     /* init */
1312     if (MPV_common_init(s) < 0)
1313         return -1;
1314
1315     if(s->modified_quant)
1316         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1317     s->progressive_frame=
1318     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1319     s->quant_precision=5;
1320
1321     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1322     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1323
1324 #ifdef CONFIG_H261_ENCODER
1325     if (s->out_format == FMT_H261)
1326         ff_h261_encode_init(s);
1327 #endif
1328     if (s->out_format == FMT_H263)
1329         h263_encode_init(s);
1330     if(s->msmpeg4_version)
1331         ff_msmpeg4_encode_init(s);
1332     if (s->out_format == FMT_MPEG1)
1333         ff_mpeg1_encode_init(s);
1334
1335     /* init q matrix */
1336     for(i=0;i<64;i++) {
1337         int j= s->dsp.idct_permutation[i];
1338         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1339             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1340             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1341         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1342             s->intra_matrix[j] =
1343             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1344         }else
1345         { /* mpeg1/2 */
1346             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1347             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1348         }
1349         if(s->avctx->intra_matrix)
1350             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1351         if(s->avctx->inter_matrix)
1352             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1353     }
1354
1355     /* precompute matrix */
1356     /* for mjpeg, we do include qscale in the matrix */
1357     if (s->out_format != FMT_MJPEG) {
1358         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1359                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1360         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1361                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1362     }
1363
1364     if(ff_rate_control_init(s) < 0)
1365         return -1;
1366
1367     return 0;
1368 }
1369
1370 int MPV_encode_end(AVCodecContext *avctx)
1371 {
1372     MpegEncContext *s = avctx->priv_data;
1373
1374 #ifdef STATS
1375     print_stats();
1376 #endif
1377
1378     ff_rate_control_uninit(s);
1379
1380     MPV_common_end(s);
1381     if (s->out_format == FMT_MJPEG)
1382         mjpeg_close(s);
1383
1384     av_freep(&avctx->extradata);
1385
1386     return 0;
1387 }
1388
1389 #endif //CONFIG_ENCODERS
1390
1391 void init_rl(RLTable *rl, int use_static)
1392 {
1393     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1394     uint8_t index_run[MAX_RUN+1];
1395     int last, run, level, start, end, i;
1396
1397     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1398     if(use_static && rl->max_level[0])
1399         return;
1400
1401     /* compute max_level[], max_run[] and index_run[] */
1402     for(last=0;last<2;last++) {
1403         if (last == 0) {
1404             start = 0;
1405             end = rl->last;
1406         } else {
1407             start = rl->last;
1408             end = rl->n;
1409         }
1410
1411         memset(max_level, 0, MAX_RUN + 1);
1412         memset(max_run, 0, MAX_LEVEL + 1);
1413         memset(index_run, rl->n, MAX_RUN + 1);
1414         for(i=start;i<end;i++) {
1415             run = rl->table_run[i];
1416             level = rl->table_level[i];
1417             if (index_run[run] == rl->n)
1418                 index_run[run] = i;
1419             if (level > max_level[run])
1420                 max_level[run] = level;
1421             if (run > max_run[level])
1422                 max_run[level] = run;
1423         }
1424         if(use_static)
1425             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1426         else
1427             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1428         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1429         if(use_static)
1430             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1431         else
1432             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1433         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1434         if(use_static)
1435             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1436         else
1437             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1438         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1439     }
1440 }
1441
1442 /* draw the edges of width 'w' of an image of size width, height */
1443 //FIXME check that this is ok for mpeg4 interlaced
1444 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1445 {
1446     uint8_t *ptr, *last_line;
1447     int i;
1448
1449     last_line = buf + (height - 1) * wrap;
1450     for(i=0;i<w;i++) {
1451         /* top and bottom */
1452         memcpy(buf - (i + 1) * wrap, buf, width);
1453         memcpy(last_line + (i + 1) * wrap, last_line, width);
1454     }
1455     /* left and right */
1456     ptr = buf;
1457     for(i=0;i<height;i++) {
1458         memset(ptr - w, ptr[0], w);
1459         memset(ptr + width, ptr[width-1], w);
1460         ptr += wrap;
1461     }
1462     /* corners */
1463     for(i=0;i<w;i++) {
1464         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1465         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1466         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1467         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1468     }
1469 }
1470
1471 int ff_find_unused_picture(MpegEncContext *s, int shared){
1472     int i;
1473
1474     if(shared){
1475         for(i=0; i<MAX_PICTURE_COUNT; i++){
1476             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1477         }
1478     }else{
1479         for(i=0; i<MAX_PICTURE_COUNT; i++){
1480             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1481         }
1482         for(i=0; i<MAX_PICTURE_COUNT; i++){
1483             if(s->picture[i].data[0]==NULL) return i;
1484         }
1485     }
1486
1487     assert(0);
1488     return -1;
1489 }
1490
1491 static void update_noise_reduction(MpegEncContext *s){
1492     int intra, i;
1493
1494     for(intra=0; intra<2; intra++){
1495         if(s->dct_count[intra] > (1<<16)){
1496             for(i=0; i<64; i++){
1497                 s->dct_error_sum[intra][i] >>=1;
1498             }
1499             s->dct_count[intra] >>= 1;
1500         }
1501
1502         for(i=0; i<64; i++){
1503             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1504         }
1505     }
1506 }
1507
1508 /**
1509  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1510  */
1511 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1512 {
1513     int i;
1514     AVFrame *pic;
1515     s->mb_skipped = 0;
1516
1517     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1518
1519     /* mark&release old frames */
1520     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1521         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1522
1523         /* release forgotten pictures */
1524         /* if(mpeg124/h263) */
1525         if(!s->encoding){
1526             for(i=0; i<MAX_PICTURE_COUNT; i++){
1527                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1528                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1529                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1530                 }
1531             }
1532         }
1533     }
1534 alloc:
1535     if(!s->encoding){
1536         /* release non reference frames */
1537         for(i=0; i<MAX_PICTURE_COUNT; i++){
1538             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1539                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1540             }
1541         }
1542
1543         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1544             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1545         else{
1546             i= ff_find_unused_picture(s, 0);
1547             pic= (AVFrame*)&s->picture[i];
1548         }
1549
1550         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1551                         && !s->dropable ? 3 : 0;
1552
1553         pic->coded_picture_number= s->coded_picture_number++;
1554
1555         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1556             return -1;
1557
1558         s->current_picture_ptr= (Picture*)pic;
1559         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1560         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1561     }
1562
1563     s->current_picture_ptr->pict_type= s->pict_type;
1564 //    if(s->flags && CODEC_FLAG_QSCALE)
1565   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1566     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1567
1568     copy_picture(&s->current_picture, s->current_picture_ptr);
1569
1570   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1571     if (s->pict_type != B_TYPE) {
1572         s->last_picture_ptr= s->next_picture_ptr;
1573         if(!s->dropable)
1574             s->next_picture_ptr= s->current_picture_ptr;
1575     }
1576 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1577         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1578         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1579         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1580         s->pict_type, s->dropable);*/
1581
1582     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1583     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1584
1585     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1586         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1587         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1588         goto alloc;
1589     }
1590
1591     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1592
1593     if(s->picture_structure!=PICT_FRAME){
1594         int i;
1595         for(i=0; i<4; i++){
1596             if(s->picture_structure == PICT_BOTTOM_FIELD){
1597                  s->current_picture.data[i] += s->current_picture.linesize[i];
1598             }
1599             s->current_picture.linesize[i] *= 2;
1600             s->last_picture.linesize[i] *=2;
1601             s->next_picture.linesize[i] *=2;
1602         }
1603     }
1604   }
1605
1606     s->hurry_up= s->avctx->hurry_up;
1607     s->error_resilience= avctx->error_resilience;
1608
1609     /* set dequantizer, we can't do it during init as it might change for mpeg4
1610        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1611     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1612         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1613         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1614     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1615         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1616         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1617     }else{
1618         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1619         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1620     }
1621
1622     if(s->dct_error_sum){
1623         assert(s->avctx->noise_reduction && s->encoding);
1624
1625         update_noise_reduction(s);
1626     }
1627
1628 #ifdef HAVE_XVMC
1629     if(s->avctx->xvmc_acceleration)
1630         return XVMC_field_start(s, avctx);
1631 #endif
1632     return 0;
1633 }
1634
1635 /* generic function for encode/decode called after a frame has been coded/decoded */
1636 void MPV_frame_end(MpegEncContext *s)
1637 {
1638     int i;
1639     /* draw edge for correct motion prediction if outside */
1640 #ifdef HAVE_XVMC
1641 //just to make sure that all data is rendered.
1642     if(s->avctx->xvmc_acceleration){
1643         XVMC_field_end(s);
1644     }else
1645 #endif
1646     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1647             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1648             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1649             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1650     }
1651     emms_c();
1652
1653     s->last_pict_type    = s->pict_type;
1654     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1655     if(s->pict_type!=B_TYPE){
1656         s->last_non_b_pict_type= s->pict_type;
1657     }
1658 #if 0
1659         /* copy back current_picture variables */
1660     for(i=0; i<MAX_PICTURE_COUNT; i++){
1661         if(s->picture[i].data[0] == s->current_picture.data[0]){
1662             s->picture[i]= s->current_picture;
1663             break;
1664         }
1665     }
1666     assert(i<MAX_PICTURE_COUNT);
1667 #endif
1668
1669     if(s->encoding){
1670         /* release non-reference frames */
1671         for(i=0; i<MAX_PICTURE_COUNT; i++){
1672             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1673                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1674             }
1675         }
1676     }
1677     // clear copies, to avoid confusion
1678 #if 0
1679     memset(&s->last_picture, 0, sizeof(Picture));
1680     memset(&s->next_picture, 0, sizeof(Picture));
1681     memset(&s->current_picture, 0, sizeof(Picture));
1682 #endif
1683     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1684 }
1685
1686 /**
1687  * draws an line from (ex, ey) -> (sx, sy).
1688  * @param w width of the image
1689  * @param h height of the image
1690  * @param stride stride/linesize of the image
1691  * @param color color of the arrow
1692  */
1693 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1694     int t, x, y, fr, f;
1695
1696     sx= clip(sx, 0, w-1);
1697     sy= clip(sy, 0, h-1);
1698     ex= clip(ex, 0, w-1);
1699     ey= clip(ey, 0, h-1);
1700
1701     buf[sy*stride + sx]+= color;
1702
1703     if(ABS(ex - sx) > ABS(ey - sy)){
1704         if(sx > ex){
1705             t=sx; sx=ex; ex=t;
1706             t=sy; sy=ey; ey=t;
1707         }
1708         buf+= sx + sy*stride;
1709         ex-= sx;
1710         f= ((ey-sy)<<16)/ex;
1711         for(x= 0; x <= ex; x++){
1712             y = (x*f)>>16;
1713             fr= (x*f)&0xFFFF;
1714             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1715             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1716         }
1717     }else{
1718         if(sy > ey){
1719             t=sx; sx=ex; ex=t;
1720             t=sy; sy=ey; ey=t;
1721         }
1722         buf+= sx + sy*stride;
1723         ey-= sy;
1724         if(ey) f= ((ex-sx)<<16)/ey;
1725         else   f= 0;
1726         for(y= 0; y <= ey; y++){
1727             x = (y*f)>>16;
1728             fr= (y*f)&0xFFFF;
1729             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1730             buf[y*stride + x+1]+= (color*         fr )>>16;;
1731         }
1732     }
1733 }
1734
1735 /**
1736  * draws an arrow from (ex, ey) -> (sx, sy).
1737  * @param w width of the image
1738  * @param h height of the image
1739  * @param stride stride/linesize of the image
1740  * @param color color of the arrow
1741  */
1742 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1743     int dx,dy;
1744
1745     sx= clip(sx, -100, w+100);
1746     sy= clip(sy, -100, h+100);
1747     ex= clip(ex, -100, w+100);
1748     ey= clip(ey, -100, h+100);
1749
1750     dx= ex - sx;
1751     dy= ey - sy;
1752
1753     if(dx*dx + dy*dy > 3*3){
1754         int rx=  dx + dy;
1755         int ry= -dx + dy;
1756         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1757
1758         //FIXME subpixel accuracy
1759         rx= ROUNDED_DIV(rx*3<<4, length);
1760         ry= ROUNDED_DIV(ry*3<<4, length);
1761
1762         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1763         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1764     }
1765     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1766 }
1767
1768 /**
1769  * prints debuging info for the given picture.
1770  */
1771 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1772
1773     if(!pict || !pict->mb_type) return;
1774
1775     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1776         int x,y;
1777
1778         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1779         switch (pict->pict_type) {
1780             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1781             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1782             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1783             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1784             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1785             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1786         }
1787         for(y=0; y<s->mb_height; y++){
1788             for(x=0; x<s->mb_width; x++){
1789                 if(s->avctx->debug&FF_DEBUG_SKIP){
1790                     int count= s->mbskip_table[x + y*s->mb_stride];
1791                     if(count>9) count=9;
1792                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1793                 }
1794                 if(s->avctx->debug&FF_DEBUG_QP){
1795                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1796                 }
1797                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1798                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1799                     //Type & MV direction
1800                     if(IS_PCM(mb_type))
1801                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1802                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1803                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1804                     else if(IS_INTRA4x4(mb_type))
1805                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1806                     else if(IS_INTRA16x16(mb_type))
1807                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1808                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1809                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1810                     else if(IS_DIRECT(mb_type))
1811                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1812                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1813                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1814                     else if(IS_GMC(mb_type))
1815                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1816                     else if(IS_SKIP(mb_type))
1817                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1818                     else if(!USES_LIST(mb_type, 1))
1819                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1820                     else if(!USES_LIST(mb_type, 0))
1821                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1822                     else{
1823                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1824                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1825                     }
1826
1827                     //segmentation
1828                     if(IS_8X8(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1830                     else if(IS_16X8(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1832                     else if(IS_8X16(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1834                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1836                     else
1837                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1838
1839
1840                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1841                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1842                     else
1843                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1844                 }
1845 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1846             }
1847             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1848         }
1849     }
1850
1851     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1852         const int shift= 1 + s->quarter_sample;
1853         int mb_y;
1854         uint8_t *ptr;
1855         int i;
1856         int h_chroma_shift, v_chroma_shift;
1857         const int width = s->avctx->width;
1858         const int height= s->avctx->height;
1859         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1860         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1861         s->low_delay=0; //needed to see the vectors without trashing the buffers
1862
1863         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1864         for(i=0; i<3; i++){
1865             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1866             pict->data[i]= s->visualization_buffer[i];
1867         }
1868         pict->type= FF_BUFFER_TYPE_COPY;
1869         ptr= pict->data[0];
1870
1871         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1872             int mb_x;
1873             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1874                 const int mb_index= mb_x + mb_y*s->mb_stride;
1875                 if((s->avctx->debug_mv) && pict->motion_val){
1876                   int type;
1877                   for(type=0; type<3; type++){
1878                     int direction = 0;
1879                     switch (type) {
1880                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1881                                 continue;
1882                               direction = 0;
1883                               break;
1884                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1885                                 continue;
1886                               direction = 0;
1887                               break;
1888                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1889                                 continue;
1890                               direction = 1;
1891                               break;
1892                     }
1893                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1894                         continue;
1895
1896                     if(IS_8X8(pict->mb_type[mb_index])){
1897                       int i;
1898                       for(i=0; i<4; i++){
1899                         int sx= mb_x*16 + 4 + 8*(i&1);
1900                         int sy= mb_y*16 + 4 + 8*(i>>1);
1901                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1902                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1903                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1904                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1905                       }
1906                     }else if(IS_16X8(pict->mb_type[mb_index])){
1907                       int i;
1908                       for(i=0; i<2; i++){
1909                         int sx=mb_x*16 + 8;
1910                         int sy=mb_y*16 + 4 + 8*i;
1911                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1912                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1913                         int my=(pict->motion_val[direction][xy][1]>>shift);
1914
1915                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1916                             my*=2;
1917
1918                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1919                       }
1920                     }else if(IS_8X16(pict->mb_type[mb_index])){
1921                       int i;
1922                       for(i=0; i<2; i++){
1923                         int sx=mb_x*16 + 4 + 8*i;
1924                         int sy=mb_y*16 + 8;
1925                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1926                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1927                         int my=(pict->motion_val[direction][xy][1]>>shift);
1928
1929                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1930                             my*=2;
1931
1932                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1933                       }
1934                     }else{
1935                       int sx= mb_x*16 + 8;
1936                       int sy= mb_y*16 + 8;
1937                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1938                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1939                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1940                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1941                     }
1942                   }
1943                 }
1944                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1945                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1946                     int y;
1947                     for(y=0; y<8; y++){
1948                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1949                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1950                     }
1951                 }
1952                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1953                     int mb_type= pict->mb_type[mb_index];
1954                     uint64_t u,v;
1955                     int y;
1956 #define COLOR(theta, r)\
1957 u= (int)(128 + r*cos(theta*3.141592/180));\
1958 v= (int)(128 + r*sin(theta*3.141592/180));
1959
1960
1961                     u=v=128;
1962                     if(IS_PCM(mb_type)){
1963                         COLOR(120,48)
1964                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1965                         COLOR(30,48)
1966                     }else if(IS_INTRA4x4(mb_type)){
1967                         COLOR(90,48)
1968                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1969 //                        COLOR(120,48)
1970                     }else if(IS_DIRECT(mb_type)){
1971                         COLOR(150,48)
1972                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1973                         COLOR(170,48)
1974                     }else if(IS_GMC(mb_type)){
1975                         COLOR(190,48)
1976                     }else if(IS_SKIP(mb_type)){
1977 //                        COLOR(180,48)
1978                     }else if(!USES_LIST(mb_type, 1)){
1979                         COLOR(240,48)
1980                     }else if(!USES_LIST(mb_type, 0)){
1981                         COLOR(0,48)
1982                     }else{
1983                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1984                         COLOR(300,48)
1985                     }
1986
1987                     u*= 0x0101010101010101ULL;
1988                     v*= 0x0101010101010101ULL;
1989                     for(y=0; y<8; y++){
1990                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1991                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1992                     }
1993
1994                     //segmentation
1995                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1996                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1997                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1998                     }
1999                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2000                         for(y=0; y<16; y++)
2001                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2002                     }
2003                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2004                         int dm= 1 << (mv_sample_log2-2);
2005                         for(i=0; i<4; i++){
2006                             int sx= mb_x*16 + 8*(i&1);
2007                             int sy= mb_y*16 + 8*(i>>1);
2008                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2009                             //FIXME bidir
2010                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2011                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2012                                 for(y=0; y<8; y++)
2013                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2014                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2015                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2016                         }
2017                     }
2018
2019                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2020                         // hmm
2021                     }
2022                 }
2023                 s->mbskip_table[mb_index]=0;
2024             }
2025         }
2026     }
2027 }
2028
2029 #ifdef CONFIG_ENCODERS
2030
2031 static int get_sae(uint8_t *src, int ref, int stride){
2032     int x,y;
2033     int acc=0;
2034
2035     for(y=0; y<16; y++){
2036         for(x=0; x<16; x++){
2037             acc+= ABS(src[x+y*stride] - ref);
2038         }
2039     }
2040
2041     return acc;
2042 }
2043
2044 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2045     int x, y, w, h;
2046     int acc=0;
2047
2048     w= s->width &~15;
2049     h= s->height&~15;
2050
2051     for(y=0; y<h; y+=16){
2052         for(x=0; x<w; x+=16){
2053             int offset= x + y*stride;
2054             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2055             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2056             int sae = get_sae(src + offset, mean, stride);
2057
2058             acc+= sae + 500 < sad;
2059         }
2060     }
2061     return acc;
2062 }
2063
2064
2065 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2066     AVFrame *pic=NULL;
2067     int64_t pts;
2068     int i;
2069     const int encoding_delay= s->max_b_frames;
2070     int direct=1;
2071
2072     if(pic_arg){
2073         pts= pic_arg->pts;
2074         pic_arg->display_picture_number= s->input_picture_number++;
2075
2076         if(pts != AV_NOPTS_VALUE){
2077             if(s->user_specified_pts != AV_NOPTS_VALUE){
2078                 int64_t time= pts;
2079                 int64_t last= s->user_specified_pts;
2080
2081                 if(time <= last){
2082                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2083                     return -1;
2084                 }
2085             }
2086             s->user_specified_pts= pts;
2087         }else{
2088             if(s->user_specified_pts != AV_NOPTS_VALUE){
2089                 s->user_specified_pts=
2090                 pts= s->user_specified_pts + 1;
2091                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2092             }else{
2093                 pts= pic_arg->display_picture_number;
2094             }
2095         }
2096     }
2097
2098   if(pic_arg){
2099     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2100     if(pic_arg->linesize[0] != s->linesize) direct=0;
2101     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2102     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2103
2104 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2105
2106     if(direct){
2107         i= ff_find_unused_picture(s, 1);
2108
2109         pic= (AVFrame*)&s->picture[i];
2110         pic->reference= 3;
2111
2112         for(i=0; i<4; i++){
2113             pic->data[i]= pic_arg->data[i];
2114             pic->linesize[i]= pic_arg->linesize[i];
2115         }
2116         alloc_picture(s, (Picture*)pic, 1);
2117     }else{
2118         i= ff_find_unused_picture(s, 0);
2119
2120         pic= (AVFrame*)&s->picture[i];
2121         pic->reference= 3;
2122
2123         alloc_picture(s, (Picture*)pic, 0);
2124
2125         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2126            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2127            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2128        // empty
2129         }else{
2130             int h_chroma_shift, v_chroma_shift;
2131             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2132
2133             for(i=0; i<3; i++){
2134                 int src_stride= pic_arg->linesize[i];
2135                 int dst_stride= i ? s->uvlinesize : s->linesize;
2136                 int h_shift= i ? h_chroma_shift : 0;
2137                 int v_shift= i ? v_chroma_shift : 0;
2138                 int w= s->width >>h_shift;
2139                 int h= s->height>>v_shift;
2140                 uint8_t *src= pic_arg->data[i];
2141                 uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2142
2143                 if(src_stride==dst_stride)
2144                     memcpy(dst, src, src_stride*h);
2145                 else{
2146                     while(h--){
2147                         memcpy(dst, src, w);
2148                         dst += dst_stride;
2149                         src += src_stride;
2150                     }
2151                 }
2152             }
2153         }
2154     }
2155     copy_picture_attributes(s, pic, pic_arg);
2156     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2157   }
2158
2159     /* shift buffer entries */
2160     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2161         s->input_picture[i-1]= s->input_picture[i];
2162
2163     s->input_picture[encoding_delay]= (Picture*)pic;
2164
2165     return 0;
2166 }
2167
2168 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2169     int x, y, plane;
2170     int score=0;
2171     int64_t score64=0;
2172
2173     for(plane=0; plane<3; plane++){
2174         const int stride= p->linesize[plane];
2175         const int bw= plane ? 1 : 2;
2176         for(y=0; y<s->mb_height*bw; y++){
2177             for(x=0; x<s->mb_width*bw; x++){
2178                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2179                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2180
2181                 switch(s->avctx->frame_skip_exp){
2182                     case 0: score= FFMAX(score, v); break;
2183                     case 1: score+= ABS(v);break;
2184                     case 2: score+= v*v;break;
2185                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2186                     case 4: score64+= v*v*(int64_t)(v*v);break;
2187                 }
2188             }
2189         }
2190     }
2191
2192     if(score) score64= score;
2193
2194     if(score64 < s->avctx->frame_skip_threshold)
2195         return 1;
2196     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2197         return 1;
2198     return 0;
2199 }
2200
2201 static int estimate_best_b_count(MpegEncContext *s){
2202     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2203     AVCodecContext *c= avcodec_alloc_context();
2204     AVFrame input[FF_MAX_B_FRAMES+2];
2205     const int scale= s->avctx->brd_scale;
2206     int i, j, out_size, p_lambda, b_lambda, lambda2;
2207     int outbuf_size= s->width * s->height; //FIXME
2208     uint8_t *outbuf= av_malloc(outbuf_size);
2209     int64_t best_rd= INT64_MAX;
2210     int best_b_count= -1;
2211
2212     assert(scale>=0 && scale <=3);
2213
2214 //    emms_c();
2215     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2216     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2217     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2218     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2219
2220     c->width = s->width >> scale;
2221     c->height= s->height>> scale;
2222     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2223     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2224     c->mb_decision= s->avctx->mb_decision;
2225     c->me_cmp= s->avctx->me_cmp;
2226     c->mb_cmp= s->avctx->mb_cmp;
2227     c->me_sub_cmp= s->avctx->me_sub_cmp;
2228     c->pix_fmt = PIX_FMT_YUV420P;
2229     c->time_base= s->avctx->time_base;
2230     c->max_b_frames= s->max_b_frames;
2231
2232     if (avcodec_open(c, codec) < 0)
2233         return -1;
2234
2235     for(i=0; i<s->max_b_frames+2; i++){
2236         int ysize= c->width*c->height;
2237         int csize= (c->width/2)*(c->height/2);
2238         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2239
2240         if(pre_input_ptr)
2241             pre_input= *pre_input_ptr;
2242
2243         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2244             pre_input.data[0]+=INPLACE_OFFSET;
2245             pre_input.data[1]+=INPLACE_OFFSET;
2246             pre_input.data[2]+=INPLACE_OFFSET;
2247         }
2248
2249         avcodec_get_frame_defaults(&input[i]);
2250         input[i].data[0]= av_malloc(ysize + 2*csize);
2251         input[i].data[1]= input[i].data[0] + ysize;
2252         input[i].data[2]= input[i].data[1] + csize;
2253         input[i].linesize[0]= c->width;
2254         input[i].linesize[1]=
2255         input[i].linesize[2]= c->width/2;
2256
2257         if(!i || s->input_picture[i-1]){
2258             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2259             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2260             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2261         }
2262     }
2263
2264     for(j=0; j<s->max_b_frames+1; j++){
2265         int64_t rd=0;
2266
2267         if(!s->input_picture[j])
2268             break;
2269
2270         c->error[0]= c->error[1]= c->error[2]= 0;
2271
2272         input[0].pict_type= I_TYPE;
2273         input[0].quality= 1 * FF_QP2LAMBDA;
2274         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2275 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2276
2277         for(i=0; i<s->max_b_frames+1; i++){
2278             int is_p= i % (j+1) == j || i==s->max_b_frames;
2279
2280             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2281             input[i+1].quality= is_p ? p_lambda : b_lambda;
2282             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2283             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2284         }
2285
2286         /* get the delayed frames */
2287         while(out_size){
2288             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2289             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2290         }
2291
2292         rd += c->error[0] + c->error[1] + c->error[2];
2293
2294         if(rd < best_rd){
2295             best_rd= rd;
2296             best_b_count= j;
2297         }
2298     }
2299
2300     av_freep(&outbuf);
2301     avcodec_close(c);
2302     av_freep(&c);
2303
2304     for(i=0; i<s->max_b_frames+2; i++){
2305         av_freep(&input[i].data[0]);
2306     }
2307
2308     return best_b_count;
2309 }
2310
2311 static void select_input_picture(MpegEncContext *s){
2312     int i;
2313
2314     for(i=1; i<MAX_PICTURE_COUNT; i++)
2315         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2316     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2317
2318     /* set next picture type & ordering */
2319     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2320         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2321             s->reordered_input_picture[0]= s->input_picture[0];
2322             s->reordered_input_picture[0]->pict_type= I_TYPE;
2323             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2324         }else{
2325             int b_frames;
2326
2327             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2328                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2329                 //FIXME check that te gop check above is +-1 correct
2330 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2331
2332                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2333                         for(i=0; i<4; i++)
2334                             s->input_picture[0]->data[i]= NULL;
2335                         s->input_picture[0]->type= 0;
2336                     }else{
2337                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2338                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2339
2340                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2341                     }
2342
2343                     emms_c();
2344                     ff_vbv_update(s, 0);
2345
2346                     goto no_output_pic;
2347                 }
2348             }
2349
2350             if(s->flags&CODEC_FLAG_PASS2){
2351                 for(i=0; i<s->max_b_frames+1; i++){
2352                     int pict_num= s->input_picture[0]->display_picture_number + i;
2353
2354                     if(pict_num >= s->rc_context.num_entries)
2355                         break;
2356                     if(!s->input_picture[i]){
2357                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2358                         break;
2359                     }
2360
2361                     s->input_picture[i]->pict_type=
2362                         s->rc_context.entry[pict_num].new_pict_type;
2363                 }
2364             }
2365
2366             if(s->avctx->b_frame_strategy==0){
2367                 b_frames= s->max_b_frames;
2368                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2369             }else if(s->avctx->b_frame_strategy==1){
2370                 for(i=1; i<s->max_b_frames+1; i++){
2371                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2372                         s->input_picture[i]->b_frame_score=
2373                             get_intra_count(s, s->input_picture[i  ]->data[0],
2374                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2375                     }
2376                 }
2377                 for(i=0; i<s->max_b_frames+1; i++){
2378                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2379                 }
2380
2381                 b_frames= FFMAX(0, i-1);
2382
2383                 /* reset scores */
2384                 for(i=0; i<b_frames+1; i++){
2385                     s->input_picture[i]->b_frame_score=0;
2386                 }
2387             }else if(s->avctx->b_frame_strategy==2){
2388                 b_frames= estimate_best_b_count(s);
2389             }else{
2390                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2391                 b_frames=0;
2392             }
2393
2394             emms_c();
2395 //static int b_count=0;
2396 //b_count+= b_frames;
2397 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2398
2399             for(i= b_frames - 1; i>=0; i--){
2400                 int type= s->input_picture[i]->pict_type;
2401                 if(type && type != B_TYPE)
2402                     b_frames= i;
2403             }
2404             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2405                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2406             }
2407
2408             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2409               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2410                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2411               }else{
2412                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2413                     b_frames=0;
2414                 s->input_picture[b_frames]->pict_type= I_TYPE;
2415               }
2416             }
2417
2418             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2419                && b_frames
2420                && s->input_picture[b_frames]->pict_type== I_TYPE)
2421                 b_frames--;
2422
2423             s->reordered_input_picture[0]= s->input_picture[b_frames];
2424             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2425                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2426             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2427             for(i=0; i<b_frames; i++){
2428                 s->reordered_input_picture[i+1]= s->input_picture[i];
2429                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2430                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2431             }
2432         }
2433     }
2434 no_output_pic:
2435     if(s->reordered_input_picture[0]){
2436         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2437
2438         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2439
2440         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2441             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2442
2443             int i= ff_find_unused_picture(s, 0);
2444             Picture *pic= &s->picture[i];
2445
2446             /* mark us unused / free shared pic */
2447             for(i=0; i<4; i++)
2448                 s->reordered_input_picture[0]->data[i]= NULL;
2449             s->reordered_input_picture[0]->type= 0;
2450
2451             pic->reference              = s->reordered_input_picture[0]->reference;
2452
2453             alloc_picture(s, pic, 0);
2454
2455             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2456
2457             s->current_picture_ptr= pic;
2458         }else{
2459             // input is not a shared pix -> reuse buffer for current_pix
2460
2461             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2462                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2463
2464             s->current_picture_ptr= s->reordered_input_picture[0];
2465             for(i=0; i<4; i++){
2466                 s->new_picture.data[i]+= INPLACE_OFFSET;
2467             }
2468         }
2469         copy_picture(&s->current_picture, s->current_picture_ptr);
2470
2471         s->picture_number= s->new_picture.display_picture_number;
2472 //printf("dpn:%d\n", s->picture_number);
2473     }else{
2474        memset(&s->new_picture, 0, sizeof(Picture));
2475     }
2476 }
2477
2478 int MPV_encode_picture(AVCodecContext *avctx,
2479                        unsigned char *buf, int buf_size, void *data)
2480 {
2481     MpegEncContext *s = avctx->priv_data;
2482     AVFrame *pic_arg = data;
2483     int i, stuffing_count;
2484
2485     for(i=0; i<avctx->thread_count; i++){
2486         int start_y= s->thread_context[i]->start_mb_y;
2487         int   end_y= s->thread_context[i]->  end_mb_y;
2488         int h= s->mb_height;
2489         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2490         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2491
2492         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2493     }
2494
2495     s->picture_in_gop_number++;
2496
2497     if(load_input_picture(s, pic_arg) < 0)
2498         return -1;
2499
2500     select_input_picture(s);
2501
2502     /* output? */
2503     if(s->new_picture.data[0]){
2504         s->pict_type= s->new_picture.pict_type;
2505 //emms_c();
2506 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2507         MPV_frame_start(s, avctx);
2508
2509         encode_picture(s, s->picture_number);
2510
2511         avctx->real_pict_num  = s->picture_number;
2512         avctx->header_bits = s->header_bits;
2513         avctx->mv_bits     = s->mv_bits;
2514         avctx->misc_bits   = s->misc_bits;
2515         avctx->i_tex_bits  = s->i_tex_bits;
2516         avctx->p_tex_bits  = s->p_tex_bits;
2517         avctx->i_count     = s->i_count;
2518         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2519         avctx->skip_count  = s->skip_count;
2520
2521         MPV_frame_end(s);
2522
2523         if (s->out_format == FMT_MJPEG)
2524             mjpeg_picture_trailer(s);
2525
2526         if(s->flags&CODEC_FLAG_PASS1)
2527             ff_write_pass1_stats(s);
2528
2529         for(i=0; i<4; i++){
2530             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2531             avctx->error[i] += s->current_picture_ptr->error[i];
2532         }
2533
2534         if(s->flags&CODEC_FLAG_PASS1)
2535             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2536         flush_put_bits(&s->pb);
2537         s->frame_bits  = put_bits_count(&s->pb);
2538
2539         stuffing_count= ff_vbv_update(s, s->frame_bits);
2540         if(stuffing_count){
2541             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2542                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2543                 return -1;
2544             }
2545
2546             switch(s->codec_id){
2547             case CODEC_ID_MPEG1VIDEO:
2548             case CODEC_ID_MPEG2VIDEO:
2549                 while(stuffing_count--){
2550                     put_bits(&s->pb, 8, 0);
2551                 }
2552             break;
2553             case CODEC_ID_MPEG4:
2554                 put_bits(&s->pb, 16, 0);
2555                 put_bits(&s->pb, 16, 0x1C3);
2556                 stuffing_count -= 4;
2557                 while(stuffing_count--){
2558                     put_bits(&s->pb, 8, 0xFF);
2559                 }
2560             break;
2561             default:
2562                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2563             }
2564             flush_put_bits(&s->pb);
2565             s->frame_bits  = put_bits_count(&s->pb);
2566         }
2567
2568         /* update mpeg1/2 vbv_delay for CBR */
2569         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2570            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2571             int vbv_delay;
2572
2573             assert(s->repeat_first_field==0);
2574
2575             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2576             assert(vbv_delay < 0xFFFF);
2577
2578             s->vbv_delay_ptr[0] &= 0xF8;
2579             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2580             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2581             s->vbv_delay_ptr[2] &= 0x07;
2582             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2583         }
2584         s->total_bits += s->frame_bits;
2585         avctx->frame_bits  = s->frame_bits;
2586     }else{
2587         assert((pbBufPtr(&s->pb) == s->pb.buf));
2588         s->frame_bits=0;
2589     }
2590     assert((s->frame_bits&7)==0);
2591
2592     return s->frame_bits/8;
2593 }
2594
2595 #endif //CONFIG_ENCODERS
2596
2597 static inline void gmc1_motion(MpegEncContext *s,
2598                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2599                                uint8_t **ref_picture)
2600 {
2601     uint8_t *ptr;
2602     int offset, src_x, src_y, linesize, uvlinesize;
2603     int motion_x, motion_y;
2604     int emu=0;
2605
2606     motion_x= s->sprite_offset[0][0];
2607     motion_y= s->sprite_offset[0][1];
2608     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2609     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2610     motion_x<<=(3-s->sprite_warping_accuracy);
2611     motion_y<<=(3-s->sprite_warping_accuracy);
2612     src_x = clip(src_x, -16, s->width);
2613     if (src_x == s->width)
2614         motion_x =0;
2615     src_y = clip(src_y, -16, s->height);
2616     if (src_y == s->height)
2617         motion_y =0;
2618
2619     linesize = s->linesize;
2620     uvlinesize = s->uvlinesize;
2621
2622     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2623
2624     if(s->flags&CODEC_FLAG_EMU_EDGE){
2625         if(   (unsigned)src_x >= s->h_edge_pos - 17
2626            || (unsigned)src_y >= s->v_edge_pos - 17){
2627             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2628             ptr= s->edge_emu_buffer;
2629         }
2630     }
2631
2632     if((motion_x|motion_y)&7){
2633         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2634         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2635     }else{
2636         int dxy;
2637
2638         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2639         if (s->no_rounding){
2640             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2641         }else{
2642             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2643         }
2644     }
2645
2646     if(s->flags&CODEC_FLAG_GRAY) return;
2647
2648     motion_x= s->sprite_offset[1][0];
2649     motion_y= s->sprite_offset[1][1];
2650     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2651     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2652     motion_x<<=(3-s->sprite_warping_accuracy);
2653     motion_y<<=(3-s->sprite_warping_accuracy);
2654     src_x = clip(src_x, -8, s->width>>1);
2655     if (src_x == s->width>>1)
2656         motion_x =0;
2657     src_y = clip(src_y, -8, s->height>>1);
2658     if (src_y == s->height>>1)
2659         motion_y =0;
2660
2661     offset = (src_y * uvlinesize) + src_x;
2662     ptr = ref_picture[1] + offset;
2663     if(s->flags&CODEC_FLAG_EMU_EDGE){
2664         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2665            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2666             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2667             ptr= s->edge_emu_buffer;
2668             emu=1;
2669         }
2670     }
2671     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2672
2673     ptr = ref_picture[2] + offset;
2674     if(emu){
2675         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2676         ptr= s->edge_emu_buffer;
2677     }
2678     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2679
2680     return;
2681 }
2682
2683 static inline void gmc_motion(MpegEncContext *s,
2684                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2685                                uint8_t **ref_picture)
2686 {
2687     uint8_t *ptr;
2688     int linesize, uvlinesize;
2689     const int a= s->sprite_warping_accuracy;
2690     int ox, oy;
2691
2692     linesize = s->linesize;
2693     uvlinesize = s->uvlinesize;
2694
2695     ptr = ref_picture[0];
2696
2697     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2698     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2699
2700     s->dsp.gmc(dest_y, ptr, linesize, 16,
2701            ox,
2702            oy,
2703            s->sprite_delta[0][0], s->sprite_delta[0][1],
2704            s->sprite_delta[1][0], s->sprite_delta[1][1],
2705            a+1, (1<<(2*a+1)) - s->no_rounding,
2706            s->h_edge_pos, s->v_edge_pos);
2707     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2708            ox + s->sprite_delta[0][0]*8,
2709            oy + s->sprite_delta[1][0]*8,
2710            s->sprite_delta[0][0], s->sprite_delta[0][1],
2711            s->sprite_delta[1][0], s->sprite_delta[1][1],
2712            a+1, (1<<(2*a+1)) - s->no_rounding,
2713            s->h_edge_pos, s->v_edge_pos);
2714
2715     if(s->flags&CODEC_FLAG_GRAY) return;
2716
2717     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2718     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2719
2720     ptr = ref_picture[1];
2721     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2722            ox,
2723            oy,
2724            s->sprite_delta[0][0], s->sprite_delta[0][1],
2725            s->sprite_delta[1][0], s->sprite_delta[1][1],
2726            a+1, (1<<(2*a+1)) - s->no_rounding,
2727            s->h_edge_pos>>1, s->v_edge_pos>>1);
2728
2729     ptr = ref_picture[2];
2730     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2731            ox,
2732            oy,
2733            s->sprite_delta[0][0], s->sprite_delta[0][1],
2734            s->sprite_delta[1][0], s->sprite_delta[1][1],
2735            a+1, (1<<(2*a+1)) - s->no_rounding,
2736            s->h_edge_pos>>1, s->v_edge_pos>>1);
2737 }
2738
2739 /**
2740  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2741  * @param buf destination buffer
2742  * @param src source buffer
2743  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2744  * @param block_w width of block
2745  * @param block_h height of block
2746  * @param src_x x coordinate of the top left sample of the block in the source buffer
2747  * @param src_y y coordinate of the top left sample of the block in the source buffer
2748  * @param w width of the source buffer
2749  * @param h height of the source buffer
2750  */
2751 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2752                                     int src_x, int src_y, int w, int h){
2753     int x, y;
2754     int start_y, start_x, end_y, end_x;
2755
2756     if(src_y>= h){
2757         src+= (h-1-src_y)*linesize;
2758         src_y=h-1;
2759     }else if(src_y<=-block_h){
2760         src+= (1-block_h-src_y)*linesize;
2761         src_y=1-block_h;
2762     }
2763     if(src_x>= w){
2764         src+= (w-1-src_x);
2765         src_x=w-1;
2766     }else if(src_x<=-block_w){
2767         src+= (1-block_w-src_x);
2768         src_x=1-block_w;
2769     }
2770
2771     start_y= FFMAX(0, -src_y);
2772     start_x= FFMAX(0, -src_x);
2773     end_y= FFMIN(block_h, h-src_y);
2774     end_x= FFMIN(block_w, w-src_x);
2775
2776     // copy existing part
2777     for(y=start_y; y<end_y; y++){
2778         for(x=start_x; x<end_x; x++){
2779             buf[x + y*linesize]= src[x + y*linesize];
2780         }
2781     }
2782
2783     //top
2784     for(y=0; y<start_y; y++){
2785         for(x=start_x; x<end_x; x++){
2786             buf[x + y*linesize]= buf[x + start_y*linesize];
2787         }
2788     }
2789
2790     //bottom
2791     for(y=end_y; y<block_h; y++){
2792         for(x=start_x; x<end_x; x++){
2793             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2794         }
2795     }
2796
2797     for(y=0; y<block_h; y++){
2798        //left
2799         for(x=0; x<start_x; x++){
2800             buf[x + y*linesize]= buf[start_x + y*linesize];
2801         }
2802
2803        //right
2804         for(x=end_x; x<block_w; x++){
2805             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2806         }
2807     }
2808 }
2809
2810 static inline int hpel_motion(MpegEncContext *s,
2811                                   uint8_t *dest, uint8_t *src,
2812                                   int field_based, int field_select,
2813                                   int src_x, int src_y,
2814                                   int width, int height, int stride,
2815                                   int h_edge_pos, int v_edge_pos,
2816                                   int w, int h, op_pixels_func *pix_op,
2817                                   int motion_x, int motion_y)
2818 {
2819     int dxy;
2820     int emu=0;
2821
2822     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2823     src_x += motion_x >> 1;
2824     src_y += motion_y >> 1;
2825
2826     /* WARNING: do no forget half pels */
2827     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2828     if (src_x == width)
2829         dxy &= ~1;
2830     src_y = clip(src_y, -16, height);
2831     if (src_y == height)
2832         dxy &= ~2;
2833     src += src_y * stride + src_x;
2834
2835     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2836         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2837            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2838             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2839                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2840             src= s->edge_emu_buffer;
2841             emu=1;
2842         }
2843     }
2844     if(field_select)
2845         src += s->linesize;
2846     pix_op[dxy](dest, src, stride, h);
2847     return emu;
2848 }
2849
2850 static inline int hpel_motion_lowres(MpegEncContext *s,
2851                                   uint8_t *dest, uint8_t *src,
2852                                   int field_based, int field_select,
2853                                   int src_x, int src_y,
2854                                   int width, int height, int stride,
2855                                   int h_edge_pos, int v_edge_pos,
2856                                   int w, int h, h264_chroma_mc_func *pix_op,
2857                                   int motion_x, int motion_y)
2858 {
2859     const int lowres= s->avctx->lowres;
2860     const int s_mask= (2<<lowres)-1;
2861     int emu=0;
2862     int sx, sy;
2863
2864     if(s->quarter_sample){
2865         motion_x/=2;
2866         motion_y/=2;
2867     }
2868
2869     sx= motion_x & s_mask;
2870     sy= motion_y & s_mask;
2871     src_x += motion_x >> (lowres+1);
2872     src_y += motion_y >> (lowres+1);
2873
2874     src += src_y * stride + src_x;
2875
2876     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2877        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2878         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2879                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2880         src= s->edge_emu_buffer;
2881         emu=1;
2882     }
2883
2884     sx <<= 2 - lowres;
2885     sy <<= 2 - lowres;
2886     if(field_select)
2887         src += s->linesize;
2888     pix_op[lowres](dest, src, stride, h, sx, sy);
2889     return emu;
2890 }
2891
2892 /* apply one mpeg motion vector to the three components */
2893 static always_inline void mpeg_motion(MpegEncContext *s,
2894                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2895                                int field_based, int bottom_field, int field_select,
2896                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2897                                int motion_x, int motion_y, int h)
2898 {
2899     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2900     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2901
2902 #if 0
2903 if(s->quarter_sample)
2904 {
2905     motion_x>>=1;
2906     motion_y>>=1;
2907 }
2908 #endif
2909
2910     v_edge_pos = s->v_edge_pos >> field_based;
2911     linesize   = s->current_picture.linesize[0] << field_based;
2912     uvlinesize = s->current_picture.linesize[1] << field_based;
2913
2914     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2915     src_x = s->mb_x* 16               + (motion_x >> 1);
2916     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2917
2918     if (s->out_format == FMT_H263) {
2919         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2920             mx = (motion_x>>1)|(motion_x&1);
2921             my = motion_y >>1;
2922             uvdxy = ((my & 1) << 1) | (mx & 1);
2923             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2924             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2925         }else{
2926             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2927             uvsrc_x = src_x>>1;
2928             uvsrc_y = src_y>>1;
2929         }
2930     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2931         mx = motion_x / 4;
2932         my = motion_y / 4;
2933         uvdxy = 0;
2934         uvsrc_x = s->mb_x*8 + mx;
2935         uvsrc_y = s->mb_y*8 + my;
2936     } else {
2937         if(s->chroma_y_shift){
2938             mx = motion_x / 2;
2939             my = motion_y / 2;
2940             uvdxy = ((my & 1) << 1) | (mx & 1);
2941             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2942             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2943         } else {
2944             if(s->chroma_x_shift){
2945             //Chroma422
2946                 mx = motion_x / 2;
2947                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2948                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2949                 uvsrc_y = src_y;
2950             } else {
2951             //Chroma444
2952                 uvdxy = dxy;
2953                 uvsrc_x = src_x;
2954                 uvsrc_y = src_y;
2955             }
2956         }
2957     }
2958
2959     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2960     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2961     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2962
2963     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2964        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2965             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2966                s->codec_id == CODEC_ID_MPEG1VIDEO){
2967                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2968                 return ;
2969             }
2970             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2971                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2972             ptr_y = s->edge_emu_buffer;
2973             if(!(s->flags&CODEC_FLAG_GRAY)){
2974                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2975                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2976                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2977                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2978                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2979                 ptr_cb= uvbuf;
2980                 ptr_cr= uvbuf+16;
2981             }
2982     }
2983
2984     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2985         dest_y += s->linesize;
2986         dest_cb+= s->uvlinesize;
2987         dest_cr+= s->uvlinesize;
2988     }
2989
2990     if(field_select){
2991         ptr_y += s->linesize;
2992         ptr_cb+= s->uvlinesize;
2993         ptr_cr+= s->uvlinesize;
2994     }
2995
2996     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2997
2998     if(!(s->flags&CODEC_FLAG_GRAY)){
2999         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3000         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3001     }
3002 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3003     if(s->out_format == FMT_H261){
3004         ff_h261_loop_filter(s);
3005     }
3006 #endif
3007 }
3008
3009 /* apply one mpeg motion vector to the three components */
3010 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
3011                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3012                                int field_based, int bottom_field, int field_select,
3013                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3014                                int motion_x, int motion_y, int h)
3015 {
3016     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3017     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3018     const int lowres= s->avctx->lowres;
3019     const int block_s= 8>>lowres;
3020     const int s_mask= (2<<lowres)-1;
3021     const int h_edge_pos = s->h_edge_pos >> lowres;
3022     const int v_edge_pos = s->v_edge_pos >> lowres;
3023     linesize   = s->current_picture.linesize[0] << field_based;
3024     uvlinesize = s->current_picture.linesize[1] << field_based;
3025
3026     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3027         motion_x/=2;
3028         motion_y/=2;
3029     }
3030
3031     if(field_based){
3032         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3033     }
3034
3035     sx= motion_x & s_mask;
3036     sy= motion_y & s_mask;
3037     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3038     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3039
3040     if (s->out_format == FMT_H263) {
3041         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3042         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3043         uvsrc_x = src_x>>1;
3044         uvsrc_y = src_y>>1;
3045     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3046         mx = motion_x / 4;
3047         my = motion_y / 4;
3048         uvsx = (2*mx) & s_mask;
3049         uvsy = (2*my) & s_mask;
3050         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3051         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3052     } else {
3053         mx = motion_x / 2;
3054         my = motion_y / 2;
3055         uvsx = mx & s_mask;
3056         uvsy = my & s_mask;
3057         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3058         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3059     }
3060
3061     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3062     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3063     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3064
3065     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3066        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3067             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3068                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3069             ptr_y = s->edge_emu_buffer;
3070             if(!(s->flags&CODEC_FLAG_GRAY)){
3071                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3072                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3073                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3074                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3075                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3076                 ptr_cb= uvbuf;
3077                 ptr_cr= uvbuf+16;
3078             }
3079     }
3080
3081     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3082         dest_y += s->linesize;
3083         dest_cb+= s->uvlinesize;
3084         dest_cr+= s->uvlinesize;
3085     }
3086
3087     if(field_select){
3088         ptr_y += s->linesize;
3089         ptr_cb+= s->uvlinesize;
3090         ptr_cr+= s->uvlinesize;
3091     }
3092
3093     sx <<= 2 - lowres;
3094     sy <<= 2 - lowres;
3095     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3096
3097     if(!(s->flags&CODEC_FLAG_GRAY)){
3098         uvsx <<= 2 - lowres;
3099         uvsy <<= 2 - lowres;
3100         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3101         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3102     }
3103     //FIXME h261 lowres loop filter
3104 }
3105
3106 //FIXME move to dsputil, avg variant, 16x16 version
3107 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3108     int x;
3109     uint8_t * const top   = src[1];
3110     uint8_t * const left  = src[2];
3111     uint8_t * const mid   = src[0];
3112     uint8_t * const right = src[3];
3113     uint8_t * const bottom= src[4];
3114 #define OBMC_FILTER(x, t, l, m, r, b)\
3115     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3116 #define OBMC_FILTER4(x, t, l, m, r, b)\
3117     OBMC_FILTER(x         , t, l, m, r, b);\
3118     OBMC_FILTER(x+1       , t, l, m, r, b);\
3119     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3120     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3121
3122     x=0;
3123     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3124     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3125     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3126     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3127     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3128     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3129     x+= stride;
3130     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3131     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3132     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3133     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3134     x+= stride;
3135     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3136     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3137     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3138     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3139     x+= 2*stride;
3140     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3141     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3142     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3143     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3144     x+= 2*stride;
3145     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3146     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3147     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3148     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3149     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3150     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3151     x+= stride;
3152     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3153     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3154     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3155     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3156 }
3157
3158 /* obmc for 1 8x8 luma block */
3159 static inline void obmc_motion(MpegEncContext *s,
3160                                uint8_t *dest, uint8_t *src,
3161                                int src_x, int src_y,
3162                                op_pixels_func *pix_op,
3163                                int16_t mv[5][2]/* mid top left right bottom*/)
3164 #define MID    0
3165 {
3166     int i;
3167     uint8_t *ptr[5];
3168
3169     assert(s->quarter_sample==0);
3170
3171     for(i=0; i<5; i++){
3172         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3173             ptr[i]= ptr[MID];
3174         }else{
3175             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3176             hpel_motion(s, ptr[i], src, 0, 0,
3177                         src_x, src_y,
3178                         s->width, s->height, s->linesize,
3179                         s->h_edge_pos, s->v_edge_pos,
3180                         8, 8, pix_op,
3181                         mv[i][0], mv[i][1]);
3182         }
3183     }
3184
3185     put_obmc(dest, ptr, s->linesize);
3186 }
3187
3188 static inline void qpel_motion(MpegEncContext *s,
3189                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3190                                int field_based, int bottom_field, int field_select,
3191                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3192                                qpel_mc_func (*qpix_op)[16],
3193                                int motion_x, int motion_y, int h)
3194 {
3195     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3196     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3197
3198     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3199     src_x = s->mb_x *  16                 + (motion_x >> 2);
3200     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3201
3202     v_edge_pos = s->v_edge_pos >> field_based;
3203     linesize = s->linesize << field_based;
3204     uvlinesize = s->uvlinesize << field_based;
3205
3206     if(field_based){
3207         mx= motion_x/2;
3208         my= motion_y>>1;
3209     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3210         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3211         mx= (motion_x>>1) + rtab[motion_x&7];
3212         my= (motion_y>>1) + rtab[motion_y&7];
3213     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3214         mx= (motion_x>>1)|(motion_x&1);
3215         my= (motion_y>>1)|(motion_y&1);
3216     }else{
3217         mx= motion_x/2;
3218         my= motion_y/2;
3219     }
3220     mx= (mx>>1)|(mx&1);
3221     my= (my>>1)|(my&1);
3222
3223     uvdxy= (mx&1) | ((my&1)<<1);
3224     mx>>=1;
3225     my>>=1;
3226
3227     uvsrc_x = s->mb_x *  8                 + mx;
3228     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3229
3230     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3231     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3232     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3233
3234     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3235        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3236         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3237                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3238         ptr_y= s->edge_emu_buffer;
3239         if(!(s->flags&CODEC_FLAG_GRAY)){
3240             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3241             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3242                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3243             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3244                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3245             ptr_cb= uvbuf;
3246             ptr_cr= uvbuf + 16;
3247         }
3248     }
3249
3250     if(!field_based)
3251         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3252     else{
3253         if(bottom_field){
3254             dest_y += s->linesize;
3255             dest_cb+= s->uvlinesize;
3256             dest_cr+= s->uvlinesize;
3257         }
3258
3259         if(field_select){
3260             ptr_y  += s->linesize;
3261             ptr_cb += s->uvlinesize;
3262             ptr_cr += s->uvlinesize;
3263         }
3264         //damn interlaced mode
3265         //FIXME boundary mirroring is not exactly correct here
3266         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3267         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3268     }
3269     if(!(s->flags&CODEC_FLAG_GRAY)){
3270         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3271         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3272     }
3273 }
3274
3275 inline int ff_h263_round_chroma(int x){
3276     if (x >= 0)
3277         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3278     else {
3279         x = -x;
3280         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3281     }
3282 }
3283
3284 /**
3285  * h263 chorma 4mv motion compensation.
3286  */
3287 static inline void chroma_4mv_motion(MpegEncContext *s,
3288                                      uint8_t *dest_cb, uint8_t *dest_cr,
3289                                      uint8_t **ref_picture,
3290                                      op_pixels_func *pix_op,
3291                                      int mx, int my){
3292     int dxy, emu=0, src_x, src_y, offset;
3293     uint8_t *ptr;
3294
3295     /* In case of 8X8, we construct a single chroma motion vector
3296        with a special rounding */
3297     mx= ff_h263_round_chroma(mx);
3298     my= ff_h263_round_chroma(my);
3299
3300     dxy = ((my & 1) << 1) | (mx & 1);
3301     mx >>= 1;
3302     my >>= 1;
3303
3304     src_x = s->mb_x * 8 + mx;
3305     src_y = s->mb_y * 8 + my;
3306     src_x = clip(src_x, -8, s->width/2);
3307     if (src_x == s->width/2)
3308         dxy &= ~1;
3309     src_y = clip(src_y, -8, s->height/2);
3310     if (src_y == s->height/2)
3311         dxy &= ~2;
3312
3313     offset = (src_y * (s->uvlinesize)) + src_x;
3314     ptr = ref_picture[1] + offset;
3315     if(s->flags&CODEC_FLAG_EMU_EDGE){
3316         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3317            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3318             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3319             ptr= s->edge_emu_buffer;
3320             emu=1;
3321         }
3322     }
3323     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3324
3325     ptr = ref_picture[2] + offset;
3326     if(emu){
3327         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3328         ptr= s->edge_emu_buffer;
3329     }
3330     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3331 }
3332
3333 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3334                                      uint8_t *dest_cb, uint8_t *dest_cr,
3335                                      uint8_t **ref_picture,
3336                                      h264_chroma_mc_func *pix_op,
3337                                      int mx, int my){
3338     const int lowres= s->avctx->lowres;
3339     const int block_s= 8>>lowres;
3340     const int s_mask= (2<<lowres)-1;
3341     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3342     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3343     int emu=0, src_x, src_y, offset, sx, sy;
3344     uint8_t *ptr;
3345
3346     if(s->quarter_sample){
3347         mx/=2;
3348         my/=2;
3349     }
3350
3351     /* In case of 8X8, we construct a single chroma motion vector
3352        with a special rounding */
3353     mx= ff_h263_round_chroma(mx);
3354     my= ff_h263_round_chroma(my);
3355
3356     sx= mx & s_mask;
3357     sy= my & s_mask;
3358     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3359     src_y = s->mb_y*block_s + (my >> (lowres+1));
3360
3361     offset = src_y * s->uvlinesize + src_x;
3362     ptr = ref_picture[1] + offset;
3363     if(s->flags&CODEC_FLAG_EMU_EDGE){
3364         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3365            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3366             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3367             ptr= s->edge_emu_buffer;
3368             emu=1;
3369         }
3370     }
3371     sx <<= 2 - lowres;
3372     sy <<= 2 - lowres;
3373     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3374
3375     ptr = ref_picture[2] + offset;
3376     if(emu){
3377         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3378         ptr= s->edge_emu_buffer;
3379     }
3380     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3381 }
3382
3383 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3384     /* fetch pixels for estimated mv 4 macroblocks ahead
3385      * optimized for 64byte cache lines */
3386     const int shift = s->quarter_sample ? 2 : 1;
3387     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3388     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3389     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3390     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3391     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3392     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3393 }
3394
3395 /**
3396  * motion compensation of a single macroblock
3397  * @param s context
3398  * @param dest_y luma destination pointer
3399  * @param dest_cb chroma cb/u destination pointer
3400  * @param dest_cr chroma cr/v destination pointer
3401  * @param dir direction (0->forward, 1->backward)
3402  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3403  * @param pic_op halfpel motion compensation function (average or put normally)
3404  * @param pic_op qpel motion compensation function (average or put normally)
3405  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3406  */
3407 static inline void MPV_motion(MpegEncContext *s,
3408                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3409                               int dir, uint8_t **ref_picture,
3410                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3411 {
3412     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3413     int mb_x, mb_y, i;
3414     uint8_t *ptr, *dest;
3415
3416     mb_x = s->mb_x;
3417     mb_y = s->mb_y;
3418
3419     prefetch_motion(s, ref_picture, dir);
3420
3421     if(s->obmc && s->pict_type != B_TYPE){
3422         int16_t mv_cache[4][4][2];
3423         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3424         const int mot_stride= s->b8_stride;
3425         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3426
3427         assert(!s->mb_skipped);
3428
3429         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3430         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3431         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3432
3433         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3434             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3435         }else{
3436             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3437         }
3438
3439         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3440             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3441             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3442         }else{
3443             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3444             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3445         }
3446
3447         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3448             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3449             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3450         }else{
3451             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3452             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3453         }
3454
3455         mx = 0;
3456         my = 0;
3457         for(i=0;i<4;i++) {
3458             const int x= (i&1)+1;
3459             const int y= (i>>1)+1;
3460             int16_t mv[5][2]= {
3461                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3462                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3463                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3464                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3465                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3466             //FIXME cleanup
3467             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3468                         ref_picture[0],
3469                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3470                         pix_op[1],
3471                         mv);
3472
3473             mx += mv[0][0];
3474             my += mv[0][1];
3475         }
3476         if(!(s->flags&CODEC_FLAG_GRAY))
3477             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3478
3479         return;
3480     }
3481
3482     switch(s->mv_type) {
3483     case MV_TYPE_16X16:
3484         if(s->mcsel){
3485             if(s->real_sprite_warping_points==1){
3486                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3487                             ref_picture);
3488             }else{
3489                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3490                             ref_picture);
3491             }
3492         }else if(s->quarter_sample){
3493             qpel_motion(s, dest_y, dest_cb, dest_cr,
3494                         0, 0, 0,
3495                         ref_picture, pix_op, qpix_op,
3496                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3497         }else if(s->mspel){
3498             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3499                         ref_picture, pix_op,
3500                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3501         }else
3502         {
3503             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3504                         0, 0, 0,
3505                         ref_picture, pix_op,
3506                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3507         }
3508         break;
3509     case MV_TYPE_8X8:
3510         mx = 0;
3511         my = 0;
3512         if(s->quarter_sample){
3513             for(i=0;i<4;i++) {
3514                 motion_x = s->mv[dir][i][0];
3515                 motion_y = s->mv[dir][i][1];
3516
3517                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3518                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3519                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3520
3521                 /* WARNING: do no forget half pels */
3522                 src_x = clip(src_x, -16, s->width);
3523                 if (src_x == s->width)
3524                     dxy &= ~3;
3525                 src_y = clip(src_y, -16, s->height);
3526                 if (src_y == s->height)
3527                     dxy &= ~12;
3528
3529                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3530                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3531                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3532                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3533                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3534                         ptr= s->edge_emu_buffer;
3535                     }
3536                 }
3537                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3538                 qpix_op[1][dxy](dest, ptr, s->linesize);
3539
3540                 mx += s->mv[dir][i][0]/2;
3541                 my += s->mv[dir][i][1]/2;
3542             }
3543         }else{
3544             for(i=0;i<4;i++) {
3545                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3546                             ref_picture[0], 0, 0,
3547                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3548                             s->width, s->height, s->linesize,
3549                             s->h_edge_pos, s->v_edge_pos,
3550                             8, 8, pix_op[1],
3551                             s->mv[dir][i][0], s->mv[dir][i][1]);
3552
3553                 mx += s->mv[dir][i][0];
3554                 my += s->mv[dir][i][1];
3555             }
3556         }
3557
3558         if(!(s->flags&CODEC_FLAG_GRAY))
3559             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3560         break;
3561     case MV_TYPE_FIELD:
3562         if (s->picture_structure == PICT_FRAME) {
3563             if(s->quarter_sample){
3564                 for(i=0; i<2; i++){
3565                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3566                                 1, i, s->field_select[dir][i],
3567                                 ref_picture, pix_op, qpix_op,
3568                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3569                 }
3570             }else{
3571                 /* top field */
3572                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3573                             1, 0, s->field_select[dir][0],
3574                             ref_picture, pix_op,
3575                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3576                 /* bottom field */
3577                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3578                             1, 1, s->field_select[dir][1],
3579                             ref_picture, pix_op,
3580                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3581             }
3582         } else {
3583             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3584                 ref_picture= s->current_picture_ptr->data;
3585             }
3586
3587             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3588                         0, 0, s->field_select[dir][0],
3589                         ref_picture, pix_op,
3590                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3591         }
3592         break;
3593     case MV_TYPE_16X8:
3594         for(i=0; i<2; i++){
3595             uint8_t ** ref2picture;
3596
3597             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3598                 ref2picture= ref_picture;
3599             }else{
3600                 ref2picture= s->current_picture_ptr->data;
3601             }
3602
3603             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3604                         0, 0, s->field_select[dir][i],
3605                         ref2picture, pix_op,
3606                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3607
3608             dest_y += 16*s->linesize;
3609             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3610             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3611         }
3612         break;
3613     case MV_TYPE_DMV:
3614         if(s->picture_structure == PICT_FRAME){
3615             for(i=0; i<2; i++){
3616                 int j;
3617                 for(j=0; j<2; j++){
3618                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3619                                 1, j, j^i,
3620                                 ref_picture, pix_op,
3621                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3622                 }
3623                 pix_op = s->dsp.avg_pixels_tab;
3624             }
3625         }else{
3626             for(i=0; i<2; i++){
3627                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3628                             0, 0, s->picture_structure != i+1,
3629                             ref_picture, pix_op,
3630                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3631
3632                 // after put we make avg of the same block
3633                 pix_op=s->dsp.avg_pixels_tab;
3634
3635                 //opposite parity is always in the same frame if this is second field
3636                 if(!s->first_field){
3637                     ref_picture = s->current_picture_ptr->data;
3638                 }
3639             }
3640         }
3641     break;
3642     default: assert(0);
3643     }
3644 }
3645
3646 /**
3647  * motion compensation of a single macroblock
3648  * @param s context
3649  * @param dest_y luma destination pointer
3650  * @param dest_cb chroma cb/u destination pointer
3651  * @param dest_cr chroma cr/v destination pointer
3652  * @param dir direction (0->forward, 1->backward)
3653  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3654  * @param pic_op halfpel motion compensation function (average or put normally)
3655  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3656  */
3657 static inline void MPV_motion_lowres(MpegEncContext *s,
3658                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3659                               int dir, uint8_t **ref_picture,
3660                               h264_chroma_mc_func *pix_op)
3661 {
3662     int mx, my;
3663     int mb_x, mb_y, i;
3664     const int lowres= s->avctx->lowres;
3665     const int block_s= 8>>lowres;
3666
3667     mb_x = s->mb_x;
3668     mb_y = s->mb_y;
3669
3670     switch(s->mv_type) {
3671     case MV_TYPE_16X16:
3672         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3673                     0, 0, 0,
3674                     ref_picture, pix_op,
3675                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3676         break;
3677     case MV_TYPE_8X8:
3678         mx = 0;
3679         my = 0;
3680             for(i=0;i<4;i++) {
3681                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3682                             ref_picture[0], 0, 0,
3683                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3684                             s->width, s->height, s->linesize,
3685                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3686                             block_s, block_s, pix_op,
3687                             s->mv[dir][i][0], s->mv[dir][i][1]);
3688
3689                 mx += s->mv[dir][i][0];
3690                 my += s->mv[dir][i][1];
3691             }
3692
3693         if(!(s->flags&CODEC_FLAG_GRAY))
3694             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3695         break;
3696     case MV_TYPE_FIELD:
3697         if (s->picture_structure == PICT_FRAME) {
3698             /* top field */
3699             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3700                         1, 0, s->field_select[dir][0],
3701                         ref_picture, pix_op,
3702                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3703             /* bottom field */
3704             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3705                         1, 1, s->field_select[dir][1],
3706                         ref_picture, pix_op,
3707                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3708         } else {
3709             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3710                 ref_picture= s->current_picture_ptr->data;
3711             }
3712
3713             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3714                         0, 0, s->field_select[dir][0],
3715                         ref_picture, pix_op,
3716                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3717         }
3718         break;
3719     case MV_TYPE_16X8:
3720         for(i=0; i<2; i++){
3721             uint8_t ** ref2picture;
3722
3723             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3724                 ref2picture= ref_picture;
3725             }else{
3726                 ref2picture= s->current_picture_ptr->data;
3727             }
3728
3729             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3730                         0, 0, s->field_select[dir][i],
3731                         ref2picture, pix_op,
3732                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3733
3734             dest_y += 2*block_s*s->linesize;
3735             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3736             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3737         }
3738         break;
3739     case MV_TYPE_DMV:
3740         if(s->picture_structure == PICT_FRAME){
3741             for(i=0; i<2; i++){
3742                 int j;
3743                 for(j=0; j<2; j++){
3744                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3745                                 1, j, j^i,
3746                                 ref_picture, pix_op,
3747                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3748                 }
3749                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3750             }
3751         }else{
3752             for(i=0; i<2; i++){
3753                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3754                             0, 0, s->picture_structure != i+1,
3755                             ref_picture, pix_op,
3756                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3757
3758                 // after put we make avg of the same block
3759                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3760
3761                 //opposite parity is always in the same frame if this is second field
3762                 if(!s->first_field){
3763                     ref_picture = s->current_picture_ptr->data;
3764                 }
3765             }
3766         }
3767     break;
3768     default: assert(0);
3769     }
3770 }
3771
3772 /* put block[] to dest[] */
3773 static inline void put_dct(MpegEncContext *s,
3774                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3775 {
3776     s->dct_unquantize_intra(s, block, i, qscale);
3777     s->dsp.idct_put (dest, line_size, block);
3778 }
3779
3780 /* add block[] to dest[] */
3781 static inline void add_dct(MpegEncContext *s,
3782                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3783 {
3784     if (s->block_last_index[i] >= 0) {
3785         s->dsp.idct_add (dest, line_size, block);
3786     }
3787 }
3788
3789 static inline void add_dequant_dct(MpegEncContext *s,
3790                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3791 {
3792     if (s->block_last_index[i] >= 0) {
3793         s->dct_unquantize_inter(s, block, i, qscale);
3794
3795         s->dsp.idct_add (dest, line_size, block);
3796     }
3797 }
3798
3799 /**
3800  * cleans dc, ac, coded_block for the current non intra MB
3801  */
3802 void ff_clean_intra_table_entries(MpegEncContext *s)
3803 {
3804     int wrap = s->b8_stride;
3805     int xy = s->block_index[0];
3806
3807     s->dc_val[0][xy           ] =
3808     s->dc_val[0][xy + 1       ] =
3809     s->dc_val[0][xy     + wrap] =
3810     s->dc_val[0][xy + 1 + wrap] = 1024;
3811     /* ac pred */
3812     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3813     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3814     if (s->msmpeg4_version>=3) {
3815         s->coded_block[xy           ] =
3816         s->coded_block[xy + 1       ] =
3817         s->coded_block[xy     + wrap] =
3818         s->coded_block[xy + 1 + wrap] = 0;
3819     }
3820     /* chroma */
3821     wrap = s->mb_stride;
3822     xy = s->mb_x + s->mb_y * wrap;
3823     s->dc_val[1][xy] =
3824     s->dc_val[2][xy] = 1024;
3825     /* ac pred */
3826     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3827     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3828
3829     s->mbintra_table[xy]= 0;
3830 }
3831
3832 /* generic function called after a macroblock has been parsed by the
3833    decoder or after it has been encoded by the encoder.
3834
3835    Important variables used:
3836    s->mb_intra : true if intra macroblock
3837    s->mv_dir   : motion vector direction
3838    s->mv_type  : motion vector type
3839    s->mv       : motion vector
3840    s->interlaced_dct : true if interlaced dct used (mpeg2)
3841  */
3842 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3843 {
3844     int mb_x, mb_y;
3845     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3846 #ifdef HAVE_XVMC
3847     if(s->avctx->xvmc_acceleration){
3848         XVMC_decode_mb(s);//xvmc uses pblocks
3849         return;
3850     }
3851 #endif
3852
3853     mb_x = s->mb_x;
3854     mb_y = s->mb_y;
3855
3856     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3857        /* save DCT coefficients */
3858        int i,j;
3859        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3860        for(i=0; i<6; i++)
3861            for(j=0; j<64; j++)
3862                *dct++ = block[i][s->dsp.idct_permutation[j]];
3863     }
3864
3865     s->current_picture.qscale_table[mb_xy]= s->qscale;
3866
3867     /* update DC predictors for P macroblocks */
3868     if (!s->mb_intra) {
3869         if (s->h263_pred || s->h263_aic) {
3870             if(s->mbintra_table[mb_xy])
3871                 ff_clean_intra_table_entries(s);
3872         } else {
3873             s->last_dc[0] =
3874             s->last_dc[1] =
3875             s->last_dc[2] = 128 << s->intra_dc_precision;
3876         }
3877     }
3878     else if (s->h263_pred || s->h263_aic)
3879         s->mbintra_table[mb_xy]=1;
3880
3881     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3882         uint8_t *dest_y, *dest_cb, *dest_cr;
3883         int dct_linesize, dct_offset;
3884         op_pixels_func (*op_pix)[4];
3885         qpel_mc_func (*op_qpix)[16];
3886         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3887         const int uvlinesize= s->current_picture.linesize[1];
3888         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3889         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3890
3891         /* avoid copy if macroblock skipped in last frame too */
3892         /* skip only during decoding as we might trash the buffers during encoding a bit */
3893         if(!s->encoding){
3894             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3895             const int age= s->current_picture.age;
3896
3897             assert(age);
3898
3899             if (s->mb_skipped) {
3900                 s->mb_skipped= 0;
3901                 assert(s->pict_type!=I_TYPE);
3902
3903                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3904                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3905
3906                 /* if previous was skipped too, then nothing to do !  */
3907                 if (*mbskip_ptr >= age && s->current_picture.reference){
3908                     return;
3909                 }
3910             } else if(!s->current_picture.reference){
3911                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3912                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3913             } else{
3914                 *mbskip_ptr = 0; /* not skipped */
3915             }
3916         }
3917
3918         dct_linesize = linesize << s->interlaced_dct;
3919         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3920
3921         if(readable){
3922             dest_y=  s->dest[0];
3923             dest_cb= s->dest[1];
3924             dest_cr= s->dest[2];
3925         }else{
3926             dest_y = s->b_scratchpad;
3927             dest_cb= s->b_scratchpad+16*linesize;
3928             dest_cr= s->b_scratchpad+32*linesize;
3929         }
3930
3931         if (!s->mb_intra) {
3932             /* motion handling */
3933             /* decoding or more than one mb_type (MC was already done otherwise) */
3934             if(!s->encoding){
3935                 if(lowres_flag){
3936                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3937
3938                     if (s->mv_dir & MV_DIR_FORWARD) {
3939                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3940                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3941                     }
3942                     if (s->mv_dir & MV_DIR_BACKWARD) {
3943                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3944                     }
3945                 }else{
3946                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3947                         op_pix = s->dsp.put_pixels_tab;
3948                         op_qpix= s->dsp.put_qpel_pixels_tab;
3949                     }else{
3950                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3951                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3952                     }
3953                     if (s->mv_dir & MV_DIR_FORWARD) {
3954                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3955                         op_pix = s->dsp.avg_pixels_tab;
3956                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3957                     }
3958                     if (s->mv_dir & MV_DIR_BACKWARD) {
3959                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3960                     }
3961                 }
3962             }
3963
3964             /* skip dequant / idct if we are really late ;) */
3965             if(s->hurry_up>1) goto skip_idct;
3966             if(s->avctx->skip_idct){
3967                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3968                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3969                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3970                     goto skip_idct;
3971             }
3972
3973             /* add dct residue */
3974             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3975                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3976                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3977                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3978                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3979                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3980
3981                 if(!(s->flags&CODEC_FLAG_GRAY)){
3982                     if (s->chroma_y_shift){
3983                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3984                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3985                     }else{
3986                         dct_linesize >>= 1;
3987                         dct_offset >>=1;
3988                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
3989                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
3990                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
3991                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
3992                     }
3993                 }
3994             } else if(s->codec_id != CODEC_ID_WMV2){
3995                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3996                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3997                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3998                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3999
4000                 if(!(s->flags&CODEC_FLAG_GRAY)){
4001                     if(s->chroma_y_shift){//Chroma420
4002                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4003                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4004                     }else{
4005                         //chroma422
4006                         dct_linesize = uvlinesize << s->interlaced_dct;
4007                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4008
4009                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4010                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4011                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4012                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4013                         if(!s->chroma_x_shift){//Chroma444
4014                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4015                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4016                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4017                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4018                         }
4019                     }
4020                 }//fi gray
4021             }
4022             else{
4023                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4024             }
4025         } else {
4026             /* dct only in intra block */
4027             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4028                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4029                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4030                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4031                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4032
4033                 if(!(s->flags&CODEC_FLAG_GRAY)){
4034                     if(s->chroma_y_shift){
4035                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4036                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4037                     }else{
4038                         dct_offset >>=1;
4039                         dct_linesize >>=1;
4040                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4041                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4042                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4043                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4044                     }
4045                 }
4046             }else{
4047                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4048                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4049                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4050                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4051
4052                 if(!(s->flags&CODEC_FLAG_GRAY)){
4053                     if(s->chroma_y_shift){
4054                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4055                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4056                     }else{
4057
4058                         dct_linesize = uvlinesize << s->interlaced_dct;
4059                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4060
4061                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4062                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4063                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4064                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4065                         if(!s->chroma_x_shift){//Chroma444
4066                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4067                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4068                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4069                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4070                         }
4071                     }
4072                 }//gray
4073             }
4074         }
4075 skip_idct:
4076         if(!readable){
4077             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4078             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4079             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4080         }
4081     }
4082 }
4083
4084 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4085     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4086     else                  MPV_decode_mb_internal(s, block, 0);
4087 }
4088
4089 #ifdef CONFIG_ENCODERS
4090
4091 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4092 {
4093     static const char tab[64]=
4094         {3,2,2,1,1,1,1,1,
4095          1,1,1,1,1,1,1,1,
4096          1,1,1,1,1,1,1,1,
4097          0,0,0,0,0,0,0,0,
4098          0,0,0,0,0,0,0,0,
4099          0,0,0,0,0,0,0,0,
4100          0,0,0,0,0,0,0,0,
4101          0,0,0,0,0,0,0,0};
4102     int score=0;
4103     int run=0;
4104     int i;
4105     DCTELEM *block= s->block[n];
4106     const int last_index= s->block_last_index[n];
4107     int skip_dc;
4108
4109     if(threshold<0){
4110         skip_dc=0;
4111         threshold= -threshold;
4112     }else
4113         skip_dc=1;
4114
4115     /* are all which we could set to zero are allready zero? */
4116     if(last_index<=skip_dc - 1) return;
4117
4118     for(i=0; i<=last_index; i++){
4119         const int j = s->intra_scantable.permutated[i];
4120         const int level = ABS(block[j]);
4121         if(level==1){
4122             if(skip_dc && i==0) continue;
4123             score+= tab[run];
4124             run=0;
4125         }else if(level>1){
4126             return;
4127         }else{
4128             run++;
4129         }
4130     }
4131     if(score >= threshold) return;
4132     for(i=skip_dc; i<=last_index; i++){
4133         const int j = s->intra_scantable.permutated[i];
4134         block[j]=0;
4135     }
4136     if(block[0]) s->block_last_index[n]= 0;
4137     else         s->block_last_index[n]= -1;
4138 }
4139
4140 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4141 {
4142     int i;
4143     const int maxlevel= s->max_qcoeff;
4144     const int minlevel= s->min_qcoeff;
4145     int overflow=0;
4146
4147     if(s->mb_intra){
4148         i=1; //skip clipping of intra dc
4149     }else
4150         i=0;
4151
4152     for(;i<=last_index; i++){
4153         const int j= s->intra_scantable.permutated[i];
4154         int level = block[j];
4155
4156         if     (level>maxlevel){
4157             level=maxlevel;
4158             overflow++;
4159         }else if(level<minlevel){
4160             level=minlevel;
4161             overflow++;
4162         }
4163
4164         block[j]= level;
4165     }
4166
4167     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4168         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4169 }
4170
4171 #endif //CONFIG_ENCODERS
4172
4173 /**
4174  *
4175  * @param h is the normal height, this will be reduced automatically if needed for the last row
4176  */
4177 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4178     if (s->avctx->draw_horiz_band) {
4179         AVFrame *src;
4180         int offset[4];
4181
4182         if(s->picture_structure != PICT_FRAME){
4183             h <<= 1;
4184             y <<= 1;
4185             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4186         }
4187
4188         h= FFMIN(h, s->avctx->height - y);
4189
4190         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4191             src= (AVFrame*)s->current_picture_ptr;
4192         else if(s->last_picture_ptr)
4193             src= (AVFrame*)s->last_picture_ptr;
4194         else
4195             return;
4196
4197         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4198             offset[0]=
4199             offset[1]=
4200             offset[2]=
4201             offset[3]= 0;
4202         }else{
4203             offset[0]= y * s->linesize;;
4204             offset[1]=
4205             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4206             offset[3]= 0;
4207         }
4208
4209         emms_c();
4210
4211         s->avctx->draw_horiz_band(s->avctx, src, offset,
4212                                   y, s->picture_structure, h);
4213     }
4214 }
4215
4216 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4217     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4218     const int uvlinesize= s->current_picture.linesize[1];
4219     const int mb_size= 4 - s->avctx->lowres;
4220
4221     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4222     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4223     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4224     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4225     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4226     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4227     //block_index is not used by mpeg2, so it is not affected by chroma_format
4228
4229     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4230     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4231     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4232
4233     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4234     {
4235         s->dest[0] += s->mb_y *   linesize << mb_size;
4236         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4237         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4238     }
4239 }
4240
4241 #ifdef CONFIG_ENCODERS
4242
4243 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4244     int x, y;
4245 //FIXME optimize
4246     for(y=0; y<8; y++){
4247         for(x=0; x<8; x++){
4248             int x2, y2;
4249             int sum=0;
4250             int sqr=0;
4251             int count=0;
4252
4253             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4254                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4255                     int v= ptr[x2 + y2*stride];
4256                     sum += v;
4257                     sqr += v*v;
4258                     count++;
4259                 }
4260             }
4261             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4262         }
4263     }
4264 }
4265
4266 static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4267 {
4268     int16_t weight[8][64];
4269     DCTELEM orig[8][64];
4270     const int mb_x= s->mb_x;
4271     const int mb_y= s->mb_y;
4272     int i;
4273     int skip_dct[8];
4274     int dct_offset   = s->linesize*8; //default for progressive frames
4275     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4276     int wrap_y, wrap_c;
4277
4278     for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
4279
4280     if(s->adaptive_quant){
4281         const int last_qp= s->qscale;
4282         const int mb_xy= mb_x + mb_y*s->mb_stride;
4283
4284         s->lambda= s->lambda_table[mb_xy];
4285         update_qscale(s);
4286
4287         if(!(s->flags&CODEC_FLAG_QP_RD)){
4288             s->dquant= s->qscale - last_qp;
4289
4290             if(s->out_format==FMT_H263){
4291                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4292
4293                 if(s->codec_id==CODEC_ID_MPEG4){
4294                     if(!s->mb_intra){
4295                         if(s->pict_type == B_TYPE){
4296                             if(s->dquant&1)
4297                                 s->dquant= (s->dquant/2)*2;
4298                             if(s->mv_dir&MV_DIRECT)
4299                                 s->dquant= 0;
4300                         }
4301                         if(s->mv_type==MV_TYPE_8X8)
4302                             s->dquant=0;
4303                     }
4304                 }
4305             }
4306         }
4307         ff_set_qscale(s, last_qp + s->dquant);
4308     }else if(s->flags&CODEC_FLAG_QP_RD)
4309         ff_set_qscale(s, s->qscale + s->dquant);
4310
4311     wrap_y = s->linesize;
4312     wrap_c = s->uvlinesize;
4313     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4314     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4315     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4316
4317     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4318         uint8_t *ebuf= s->edge_emu_buffer + 32;
4319         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4320         ptr_y= ebuf;
4321         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4322         ptr_cb= ebuf+18*wrap_y;
4323         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4324         ptr_cr= ebuf+18*wrap_y+8;
4325     }
4326
4327     if (s->mb_intra) {
4328         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4329             int progressive_score, interlaced_score;
4330
4331             s->interlaced_dct=0;
4332             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4333                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4334
4335             if(progressive_score > 0){
4336                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4337                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4338                 if(progressive_score > interlaced_score){
4339                     s->interlaced_dct=1;
4340
4341                     dct_offset= wrap_y;
4342                     wrap_y<<=1;
4343                     if (s->chroma_format == CHROMA_422)
4344                         wrap_c<<=1;
4345                 }
4346             }
4347         }
4348
4349         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4350         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4351         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4352         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4353
4354         if(s->flags&CODEC_FLAG_GRAY){
4355             skip_dct[4]= 1;
4356             skip_dct[5]= 1;
4357         }else{
4358             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4359             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4360             if(!s->chroma_y_shift){ /* 422 */
4361                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4362                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4363             }
4364         }
4365     }else{
4366         op_pixels_func (*op_pix)[4];
4367         qpel_mc_func (*op_qpix)[16];
4368         uint8_t *dest_y, *dest_cb, *dest_cr;
4369
4370         dest_y  = s->dest[0];
4371         dest_cb = s->dest[1];
4372         dest_cr = s->dest[2];
4373
4374         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4375             op_pix = s->dsp.put_pixels_tab;
4376             op_qpix= s->dsp.put_qpel_pixels_tab;
4377         }else{
4378             op_pix = s->dsp.put_no_rnd_pixels_tab;
4379             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4380         }
4381
4382         if (s->mv_dir & MV_DIR_FORWARD) {
4383             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4384             op_pix = s->dsp.avg_pixels_tab;
4385             op_qpix= s->dsp.avg_qpel_pixels_tab;
4386         }
4387         if (s->mv_dir & MV_DIR_BACKWARD) {
4388             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4389         }
4390
4391         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4392             int progressive_score, interlaced_score;
4393
4394             s->interlaced_dct=0;
4395             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4396                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4397
4398             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4399
4400             if(progressive_score>0){
4401                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4402                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4403
4404                 if(progressive_score > interlaced_score){
4405                     s->interlaced_dct=1;
4406
4407                     dct_offset= wrap_y;
4408                     wrap_y<<=1;
4409                     if (s->chroma_format == CHROMA_422)
4410                         wrap_c<<=1;
4411                 }
4412             }
4413         }
4414
4415         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4416         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4417         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4418         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4419
4420         if(s->flags&CODEC_FLAG_GRAY){
4421             skip_dct[4]= 1;
4422             skip_dct[5]= 1;
4423         }else{
4424             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4425             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4426             if(!s->chroma_y_shift){ /* 422 */
4427                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4428                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4429             }
4430         }
4431         /* pre quantization */
4432         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4433             //FIXME optimize
4434             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4435             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4436             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4437             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4438             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4439             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4440             if(!s->chroma_y_shift){ /* 422 */
4441                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4442                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4443             }
4444         }
4445     }
4446
4447     if(s->avctx->quantizer_noise_shaping){
4448         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4449         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4450         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4451         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4452         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4453         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4454         if(!s->chroma_y_shift){ /* 422 */
4455             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4456             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4457         }
4458         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4459     }
4460
4461     /* DCT & quantize */
4462     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4463     {
4464         for(i=0;i<mb_block_count;i++) {
4465             if(!skip_dct[i]){
4466                 int overflow;
4467                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4468             // FIXME we could decide to change to quantizer instead of clipping
4469             // JS: I don't think that would be a good idea it could lower quality instead
4470             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4471                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4472             }else
4473                 s->block_last_index[i]= -1;
4474         }
4475         if(s->avctx->quantizer_noise_shaping){
4476             for(i=0;i<mb_block_count;i++) {
4477                 if(!skip_dct[i]){
4478                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4479                 }
4480             }
4481         }
4482
4483         if(s->luma_elim_threshold && !s->mb_intra)
4484             for(i=0; i<4; i++)
4485                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4486         if(s->chroma_elim_threshold && !s->mb_intra)
4487             for(i=4; i<mb_block_count; i++)
4488                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4489
4490         if(s->flags & CODEC_FLAG_CBP_RD){
4491             for(i=0;i<mb_block_count;i++) {
4492                 if(s->block_last_index[i] == -1)
4493                     s->coded_score[i]= INT_MAX/256;
4494             }
4495         }
4496     }
4497
4498     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4499         s->block_last_index[4]=
4500         s->block_last_index[5]= 0;
4501         s->block[4][0]=
4502         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4503     }
4504
4505     //non c quantize code returns incorrect block_last_index FIXME
4506     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4507         for(i=0; i<mb_block_count; i++){
4508             int j;
4509             if(s->block_last_index[i]>0){
4510                 for(j=63; j>0; j--){
4511                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4512                 }
4513                 s->block_last_index[i]= j;
4514             }
4515         }
4516     }
4517
4518     /* huffman encode */
4519     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4520     case CODEC_ID_MPEG1VIDEO:
4521     case CODEC_ID_MPEG2VIDEO:
4522         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4523     case CODEC_ID_MPEG4:
4524         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4525     case CODEC_ID_MSMPEG4V2:
4526     case CODEC_ID_MSMPEG4V3:
4527     case CODEC_ID_WMV1:
4528         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4529     case CODEC_ID_WMV2:
4530          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4531 #ifdef CONFIG_H261_ENCODER
4532     case CODEC_ID_H261:
4533         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4534 #endif
4535     case CODEC_ID_H263:
4536     case CODEC_ID_H263P:
4537     case CODEC_ID_FLV1:
4538     case CODEC_ID_RV10:
4539     case CODEC_ID_RV20:
4540         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4541     case CODEC_ID_MJPEG:
4542         mjpeg_encode_mb(s, s->block); break;
4543     default:
4544         assert(0);
4545     }
4546 }
4547
4548 static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4549 {
4550     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4551     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4552 }
4553
4554 #endif //CONFIG_ENCODERS
4555
4556 void ff_mpeg_flush(AVCodecContext *avctx){
4557     int i;
4558     MpegEncContext *s = avctx->priv_data;
4559
4560     if(s==NULL || s->picture==NULL)
4561         return;
4562
4563     for(i=0; i<MAX_PICTURE_COUNT; i++){
4564        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4565                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4566         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4567     }
4568     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4569
4570     s->mb_x= s->mb_y= 0;
4571
4572     s->parse_context.state= -1;
4573     s->parse_context.frame_start_found= 0;
4574     s->parse_context.overread= 0;
4575     s->parse_context.overread_index= 0;
4576     s->parse_context.index= 0;
4577     s->parse_context.last_index= 0;
4578     s->bitstream_buffer_size=0;
4579 }
4580
4581 #ifdef CONFIG_ENCODERS
4582 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4583 {
4584     const uint16_t *srcw= (uint16_t*)src;
4585     int words= length>>4;
4586     int bits= length&15;
4587     int i;
4588
4589     if(length==0) return;
4590
4591     if(words < 16){
4592         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4593     }else if(put_bits_count(pb)&7){
4594         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4595     }else{
4596         for(i=0; put_bits_count(pb)&31; i++)
4597             put_bits(pb, 8, src[i]);
4598         flush_put_bits(pb);
4599         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4600         skip_put_bytes(pb, 2*words-i);
4601     }
4602
4603     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4604 }
4605
4606 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4607     int i;
4608
4609     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4610
4611     /* mpeg1 */
4612     d->mb_skip_run= s->mb_skip_run;
4613     for(i=0; i<3; i++)
4614         d->last_dc[i]= s->last_dc[i];
4615
4616     /* statistics */
4617     d->mv_bits= s->mv_bits;
4618     d->i_tex_bits= s->i_tex_bits;
4619     d->p_tex_bits= s->p_tex_bits;
4620     d->i_count= s->i_count;
4621     d->f_count= s->f_count;
4622     d->b_count= s->b_count;
4623     d->skip_count= s->skip_count;
4624     d->misc_bits= s->misc_bits;
4625     d->last_bits= 0;
4626
4627     d->mb_skipped= 0;
4628     d->qscale= s->qscale;
4629     d->dquant= s->dquant;
4630 }
4631
4632 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4633     int i;
4634
4635     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4636     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4637
4638     /* mpeg1 */
4639     d->mb_skip_run= s->mb_skip_run;
4640     for(i=0; i<3; i++)
4641         d->last_dc[i]= s->last_dc[i];
4642
4643     /* statistics */
4644     d->mv_bits= s->mv_bits;
4645     d->i_tex_bits= s->i_tex_bits;
4646     d->p_tex_bits= s->p_tex_bits;
4647     d->i_count= s->i_count;
4648     d->f_count= s->f_count;
4649     d->b_count= s->b_count;
4650     d->skip_count= s->skip_count;
4651     d->misc_bits= s->misc_bits;
4652
4653     d->mb_intra= s->mb_intra;
4654     d->mb_skipped= s->mb_skipped;
4655     d->mv_type= s->mv_type;
4656     d->mv_dir= s->mv_dir;
4657     d->pb= s->pb;
4658     if(s->data_partitioning){
4659         d->pb2= s->pb2;
4660         d->tex_pb= s->tex_pb;
4661     }
4662     d->block= s->block;
4663     for(i=0; i<8; i++)
4664         d->block_last_index[i]= s->block_last_index[i];
4665     d->interlaced_dct= s->interlaced_dct;
4666     d->qscale= s->qscale;
4667 }
4668
4669 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4670                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4671                            int *dmin, int *next_block, int motion_x, int motion_y)
4672 {
4673     int score;
4674     uint8_t *dest_backup[3];
4675
4676     copy_context_before_encode(s, backup, type);
4677
4678     s->block= s->blocks[*next_block];
4679     s->pb= pb[*next_block];
4680     if(s->data_partitioning){
4681         s->pb2   = pb2   [*next_block];
4682         s->tex_pb= tex_pb[*next_block];
4683     }
4684
4685     if(*next_block){
4686         memcpy(dest_backup, s->dest, sizeof(s->dest));
4687         s->dest[0] = s->rd_scratchpad;
4688         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4689         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4690         assert(s->linesize >= 32); //FIXME
4691     }
4692
4693     encode_mb(s, motion_x, motion_y);
4694
4695     score= put_bits_count(&s->pb);
4696     if(s->data_partitioning){
4697         score+= put_bits_count(&s->pb2);
4698         score+= put_bits_count(&s->tex_pb);
4699     }
4700
4701     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4702         MPV_decode_mb(s, s->block);
4703
4704         score *= s->lambda2;
4705         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4706     }
4707
4708     if(*next_block){
4709         memcpy(s->dest, dest_backup, sizeof(s->dest));
4710     }
4711
4712     if(score<*dmin){
4713         *dmin= score;
4714         *next_block^=1;
4715
4716         copy_context_after_encode(best, s, type);
4717     }
4718 }
4719
4720 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4721     uint32_t *sq = squareTbl + 256;
4722     int acc=0;
4723     int x,y;
4724
4725     if(w==16 && h==16)
4726         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4727     else if(w==8 && h==8)
4728         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4729
4730     for(y=0; y<h; y++){
4731         for(x=0; x<w; x++){
4732             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4733         }
4734     }
4735
4736     assert(acc>=0);
4737
4738     return acc;
4739 }
4740
4741 static int sse_mb(MpegEncContext *s){
4742     int w= 16;
4743     int h= 16;
4744
4745     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4746     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4747
4748     if(w==16 && h==16)
4749       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4750         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4751                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4752                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4753       }else{
4754         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4755                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4756                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4757       }
4758     else
4759         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4760                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4761                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4762 }
4763
4764 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4765     MpegEncContext *s= arg;
4766
4767
4768     s->me.pre_pass=1;
4769     s->me.dia_size= s->avctx->pre_dia_size;
4770     s->first_slice_line=1;
4771     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4772         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4773             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4774         }
4775         s->first_slice_line=0;
4776     }
4777
4778     s->me.pre_pass=0;
4779
4780     return 0;
4781 }
4782
4783 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4784     MpegEncContext *s= arg;
4785
4786     s->me.dia_size= s->avctx->dia_size;
4787     s->first_slice_line=1;
4788     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4789         s->mb_x=0; //for block init below
4790         ff_init_block_index(s);
4791         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4792             s->block_index[0]+=2;
4793             s->block_index[1]+=2;
4794             s->block_index[2]+=2;
4795             s->block_index[3]+=2;
4796
4797             /* compute motion vector & mb_type and store in context */
4798             if(s->pict_type==B_TYPE)
4799                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4800             else
4801                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4802         }
4803         s->first_slice_line=0;
4804     }
4805     return 0;
4806 }
4807
4808 static int mb_var_thread(AVCodecContext *c, void *arg){
4809     MpegEncContext *s= arg;
4810     int mb_x, mb_y;
4811
4812     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4813         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4814             int xx = mb_x * 16;
4815             int yy = mb_y * 16;
4816             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4817             int varc;
4818             int sum = s->dsp.pix_sum(pix, s->linesize);
4819
4820             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4821
4822             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4823             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4824             s->me.mb_var_sum_temp    += varc;
4825         }
4826     }
4827     return 0;
4828 }
4829
4830 static void write_slice_end(MpegEncContext *s){
4831     if(s->codec_id==CODEC_ID_MPEG4){
4832         if(s->partitioned_frame){
4833             ff_mpeg4_merge_partitions(s);
4834         }
4835
4836         ff_mpeg4_stuffing(&s->pb);
4837     }else if(s->out_format == FMT_MJPEG){
4838         ff_mjpeg_stuffing(&s->pb);
4839     }
4840
4841     align_put_bits(&s->pb);
4842     flush_put_bits(&s->pb);
4843
4844     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4845         s->misc_bits+= get_bits_diff(s);
4846 }
4847
4848 static int encode_thread(AVCodecContext *c, void *arg){
4849     MpegEncContext *s= arg;
4850     int mb_x, mb_y, pdif = 0;
4851     int i, j;
4852     MpegEncContext best_s, backup_s;
4853     uint8_t bit_buf[2][MAX_MB_BYTES];
4854     uint8_t bit_buf2[2][MAX_MB_BYTES];
4855     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4856     PutBitContext pb[2], pb2[2], tex_pb[2];
4857 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4858
4859     for(i=0; i<2; i++){
4860         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4861         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4862         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4863     }
4864
4865     s->last_bits= put_bits_count(&s->pb);
4866     s->mv_bits=0;
4867     s->misc_bits=0;
4868     s->i_tex_bits=0;
4869     s->p_tex_bits=0;
4870     s->i_count=0;
4871     s->f_count=0;
4872     s->b_count=0;
4873     s->skip_count=0;
4874
4875     for(i=0; i<3; i++){
4876         /* init last dc values */
4877         /* note: quant matrix value (8) is implied here */
4878         s->last_dc[i] = 128 << s->intra_dc_precision;
4879
4880         s->current_picture.error[i] = 0;
4881     }
4882     s->mb_skip_run = 0;
4883     memset(s->last_mv, 0, sizeof(s->last_mv));
4884
4885     s->last_mv_dir = 0;
4886
4887     switch(s->codec_id){
4888     case CODEC_ID_H263:
4889     case CODEC_ID_H263P:
4890     case CODEC_ID_FLV1:
4891         s->gob_index = ff_h263_get_gob_height(s);
4892         break;
4893     case CODEC_ID_MPEG4:
4894         if(s->partitioned_frame)
4895             ff_mpeg4_init_partitions(s);
4896         break;
4897     }
4898
4899     s->resync_mb_x=0;
4900     s->resync_mb_y=0;
4901     s->first_slice_line = 1;
4902     s->ptr_lastgob = s->pb.buf;
4903     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4904 //    printf("row %d at %X\n", s->mb_y, (int)s);
4905         s->mb_x=0;
4906         s->mb_y= mb_y;
4907
4908         ff_set_qscale(s, s->qscale);
4909         ff_init_block_index(s);
4910
4911         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4912             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4913             int mb_type= s->mb_type[xy];
4914 //            int d;
4915             int dmin= INT_MAX;
4916             int dir;
4917
4918             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4919                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4920                 return -1;
4921             }
4922             if(s->data_partitioning){
4923                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4924                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4925                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4926                     return -1;
4927                 }
4928             }
4929
4930             s->mb_x = mb_x;
4931             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4932             ff_update_block_index(s);
4933
4934 #ifdef CONFIG_H261_ENCODER
4935             if(s->codec_id == CODEC_ID_H261){
4936                 ff_h261_reorder_mb_index(s);
4937                 xy= s->mb_y*s->mb_stride + s->mb_x;
4938                 mb_type= s->mb_type[xy];
4939             }
4940 #endif
4941
4942             /* write gob / video packet header  */
4943             if(s->rtp_mode){
4944                 int current_packet_size, is_gob_start;
4945
4946                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4947
4948                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4949
4950                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4951
4952                 switch(s->codec_id){
4953                 case CODEC_ID_H263:
4954                 case CODEC_ID_H263P:
4955                     if(!s->h263_slice_structured)
4956                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4957                     break;
4958                 case CODEC_ID_MPEG2VIDEO:
4959                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4960                 case CODEC_ID_MPEG1VIDEO:
4961                     if(s->mb_skip_run) is_gob_start=0;
4962                     break;
4963                 }
4964
4965                 if(is_gob_start){
4966                     if(s->start_mb_y != mb_y || mb_x!=0){
4967                         write_slice_end(s);
4968
4969                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4970                             ff_mpeg4_init_partitions(s);
4971                         }
4972                     }
4973
4974                     assert((put_bits_count(&s->pb)&7) == 0);
4975                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4976
4977                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4978                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4979                         int d= 100 / s->avctx->error_rate;
4980                         if(r % d == 0){
4981                             current_packet_size=0;
4982 #ifndef ALT_BITSTREAM_WRITER
4983                             s->pb.buf_ptr= s->ptr_lastgob;
4984 #endif
4985                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4986                         }
4987                     }
4988
4989                     if (s->avctx->rtp_callback){
4990                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4991                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4992                     }
4993
4994                     switch(s->codec_id){
4995                     case CODEC_ID_MPEG4:
4996                         ff_mpeg4_encode_video_packet_header(s);
4997                         ff_mpeg4_clean_buffers(s);
4998                     break;
4999                     case CODEC_ID_MPEG1VIDEO:
5000                     case CODEC_ID_MPEG2VIDEO:
5001                         ff_mpeg1_encode_slice_header(s);
5002                         ff_mpeg1_clean_buffers(s);
5003                     break;
5004                     case CODEC_ID_H263:
5005                     case CODEC_ID_H263P:
5006                         h263_encode_gob_header(s, mb_y);
5007                     break;
5008                     }
5009
5010                     if(s->flags&CODEC_FLAG_PASS1){
5011                         int bits= put_bits_count(&s->pb);
5012                         s->misc_bits+= bits - s->last_bits;
5013                         s->last_bits= bits;
5014                     }
5015
5016                     s->ptr_lastgob += current_packet_size;
5017                     s->first_slice_line=1;
5018                     s->resync_mb_x=mb_x;
5019                     s->resync_mb_y=mb_y;
5020                 }
5021             }
5022
5023             if(  (s->resync_mb_x   == s->mb_x)
5024                && s->resync_mb_y+1 == s->mb_y){
5025                 s->first_slice_line=0;
5026             }
5027
5028             s->mb_skipped=0;
5029             s->dquant=0; //only for QP_RD
5030
5031             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5032                 int next_block=0;
5033                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5034
5035                 copy_context_before_encode(&backup_s, s, -1);
5036                 backup_s.pb= s->pb;
5037                 best_s.data_partitioning= s->data_partitioning;
5038                 best_s.partitioned_frame= s->partitioned_frame;
5039                 if(s->data_partitioning){
5040                     backup_s.pb2= s->pb2;
5041                     backup_s.tex_pb= s->tex_pb;
5042                 }
5043
5044                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5045                     s->mv_dir = MV_DIR_FORWARD;
5046                     s->mv_type = MV_TYPE_16X16;
5047                     s->mb_intra= 0;
5048                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5049                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5050                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5051                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5052                 }
5053                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5054                     s->mv_dir = MV_DIR_FORWARD;
5055                     s->mv_type = MV_TYPE_FIELD;
5056                     s->mb_intra= 0;
5057                     for(i=0; i<2; i++){
5058                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5059                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5060                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5061                     }
5062                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5063                                  &dmin, &next_block, 0, 0);
5064                 }
5065                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5066                     s->mv_dir = MV_DIR_FORWARD;
5067                     s->mv_type = MV_TYPE_16X16;
5068                     s->mb_intra= 0;
5069                     s->mv[0][0][0] = 0;
5070                     s->mv[0][0][1] = 0;
5071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5072                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5073                 }
5074                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5075                     s->mv_dir = MV_DIR_FORWARD;
5076                     s->mv_type = MV_TYPE_8X8;
5077                     s->mb_intra= 0;
5078                     for(i=0; i<4; i++){
5079                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5080                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5081                     }
5082                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5083                                  &dmin, &next_block, 0, 0);
5084                 }
5085                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5086                     s->mv_dir = MV_DIR_FORWARD;
5087                     s->mv_type = MV_TYPE_16X16;
5088                     s->mb_intra= 0;
5089                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5090                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5091                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5092                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5093                 }
5094                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5095                     s->mv_dir = MV_DIR_BACKWARD;
5096                     s->mv_type = MV_TYPE_16X16;
5097                     s->mb_intra= 0;
5098                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5099                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5100                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5101                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5102                 }
5103                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5104                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5105                     s->mv_type = MV_TYPE_16X16;
5106                     s->mb_intra= 0;
5107                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5108                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5109                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5110                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5111                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5112                                  &dmin, &next_block, 0, 0);
5113                 }
5114                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5115                     int mx= s->b_direct_mv_table[xy][0];
5116                     int my= s->b_direct_mv_table[xy][1];
5117
5118                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5119                     s->mb_intra= 0;
5120                     ff_mpeg4_set_direct_mv(s, mx, my);
5121                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5122                                  &dmin, &next_block, mx, my);
5123                 }
5124                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5125                     s->mv_dir = MV_DIR_FORWARD;
5126                     s->mv_type = MV_TYPE_FIELD;
5127                     s->mb_intra= 0;
5128                     for(i=0; i<2; i++){
5129                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5130                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5131                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5132                     }
5133                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5134                                  &dmin, &next_block, 0, 0);
5135                 }
5136                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5137                     s->mv_dir = MV_DIR_BACKWARD;
5138                     s->mv_type = MV_TYPE_FIELD;
5139                     s->mb_intra= 0;
5140                     for(i=0; i<2; i++){
5141                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5142                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5143                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5144                     }
5145                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5146                                  &dmin, &next_block, 0, 0);
5147                 }
5148                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5149                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5150                     s->mv_type = MV_TYPE_FIELD;
5151                     s->mb_intra= 0;
5152                     for(dir=0; dir<2; dir++){
5153                         for(i=0; i<2; i++){
5154                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5155                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5156                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5157                         }
5158                     }
5159                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5160                                  &dmin, &next_block, 0, 0);
5161                 }
5162                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5163                     s->mv_dir = 0;
5164                     s->mv_type = MV_TYPE_16X16;
5165                     s->mb_intra= 1;
5166                     s->mv[0][0][0] = 0;
5167                     s->mv[0][0][1] = 0;
5168                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5169                                  &dmin, &next_block, 0, 0);
5170                     if(s->h263_pred || s->h263_aic){
5171                         if(best_s.mb_intra)
5172                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5173                         else
5174                             ff_clean_intra_table_entries(s); //old mode?
5175                     }
5176                 }
5177
5178                 if(s->flags & CODEC_FLAG_QP_RD){
5179                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5180                         const int last_qp= backup_s.qscale;
5181                         int dquant, dir, qp, dc[6];
5182                         DCTELEM ac[6][16];
5183                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5184
5185                         assert(backup_s.dquant == 0);
5186
5187                         //FIXME intra
5188                         s->mv_dir= best_s.mv_dir;
5189                         s->mv_type = MV_TYPE_16X16;
5190                         s->mb_intra= best_s.mb_intra;
5191                         s->mv[0][0][0] = best_s.mv[0][0][0];
5192                         s->mv[0][0][1] = best_s.mv[0][0][1];
5193                         s->mv[1][0][0] = best_s.mv[1][0][0];
5194                         s->mv[1][0][1] = best_s.mv[1][0][1];
5195
5196                         dir= s->pict_type == B_TYPE ? 2 : 1;
5197                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5198                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5199                             qp= last_qp + dquant;
5200                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5201                                 break;
5202                             backup_s.dquant= dquant;
5203                             if(s->mb_intra && s->dc_val[0]){
5204                                 for(i=0; i<6; i++){
5205                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5206                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5207                                 }
5208                             }
5209
5210                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5211                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5212                             if(best_s.qscale != qp){
5213                                 if(s->mb_intra && s->dc_val[0]){
5214                                     for(i=0; i<6; i++){
5215                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5216                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5217                                     }
5218                                 }
5219                                 if(dir > 0 && dquant==dir){
5220                                     dquant= 0;
5221                                     dir= -dir;
5222                                 }else
5223                                     break;
5224                             }
5225                         }
5226                         qp= best_s.qscale;
5227                         s->current_picture.qscale_table[xy]= qp;
5228                     }
5229                 }
5230
5231                 copy_context_after_encode(s, &best_s, -1);
5232
5233                 pb_bits_count= put_bits_count(&s->pb);
5234                 flush_put_bits(&s->pb);
5235                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5236                 s->pb= backup_s.pb;
5237
5238                 if(s->data_partitioning){
5239                     pb2_bits_count= put_bits_count(&s->pb2);
5240                     flush_put_bits(&s->pb2);
5241                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5242                     s->pb2= backup_s.pb2;
5243
5244                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5245                     flush_put_bits(&s->tex_pb);
5246                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5247                     s->tex_pb= backup_s.tex_pb;
5248                 }
5249                 s->last_bits= put_bits_count(&s->pb);
5250
5251                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5252                     ff_h263_update_motion_val(s);
5253
5254                 if(next_block==0){ //FIXME 16 vs linesize16
5255                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5256                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5257                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5258                 }
5259
5260                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5261                     MPV_decode_mb(s, s->block);
5262             } else {
5263                 int motion_x, motion_y;
5264                 s->mv_type=MV_TYPE_16X16;
5265                 // only one MB-Type possible
5266
5267                 switch(mb_type){
5268                 case CANDIDATE_MB_TYPE_INTRA:
5269                     s->mv_dir = 0;
5270                     s->mb_intra= 1;
5271                     motion_x= s->mv[0][0][0] = 0;
5272                     motion_y= s->mv[0][0][1] = 0;
5273                     break;
5274                 case CANDIDATE_MB_TYPE_INTER:
5275                     s->mv_dir = MV_DIR_FORWARD;
5276                     s->mb_intra= 0;
5277                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5278                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5279                     break;
5280                 case CANDIDATE_MB_TYPE_INTER_I:
5281                     s->mv_dir = MV_DIR_FORWARD;
5282                     s->mv_type = MV_TYPE_FIELD;
5283                     s->mb_intra= 0;
5284                     for(i=0; i<2; i++){
5285                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5286                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5287                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5288                     }
5289                     motion_x = motion_y = 0;
5290                     break;
5291                 case CANDIDATE_MB_TYPE_INTER4V:
5292                     s->mv_dir = MV_DIR_FORWARD;
5293                     s->mv_type = MV_TYPE_8X8;
5294                     s->mb_intra= 0;
5295                     for(i=0; i<4; i++){
5296                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5297                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5298                     }
5299                     motion_x= motion_y= 0;
5300                     break;
5301                 case CANDIDATE_MB_TYPE_DIRECT:
5302                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5303                     s->mb_intra= 0;
5304                     motion_x=s->b_direct_mv_table[xy][0];
5305                     motion_y=s->b_direct_mv_table[xy][1];
5306                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5307                     break;
5308                 case CANDIDATE_MB_TYPE_BIDIR:
5309                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5310                     s->mb_intra= 0;
5311                     motion_x=0;
5312                     motion_y=0;
5313                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5314                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5315                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5316                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5317                     break;
5318                 case CANDIDATE_MB_TYPE_BACKWARD:
5319                     s->mv_dir = MV_DIR_BACKWARD;
5320                     s->mb_intra= 0;
5321                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5322                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5323                     break;
5324                 case CANDIDATE_MB_TYPE_FORWARD:
5325                     s->mv_dir = MV_DIR_FORWARD;
5326                     s->mb_intra= 0;
5327                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5328                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5329 //                    printf(" %d %d ", motion_x, motion_y);
5330                     break;
5331                 case CANDIDATE_MB_TYPE_FORWARD_I:
5332                     s->mv_dir = MV_DIR_FORWARD;
5333                     s->mv_type = MV_TYPE_FIELD;
5334                     s->mb_intra= 0;
5335                     for(i=0; i<2; i++){
5336                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5337                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5338                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5339                     }
5340                     motion_x=motion_y=0;
5341                     break;
5342                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5343                     s->mv_dir = MV_DIR_BACKWARD;
5344                     s->mv_type = MV_TYPE_FIELD;
5345                     s->mb_intra= 0;
5346                     for(i=0; i<2; i++){
5347                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5348                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5349                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5350                     }
5351                     motion_x=motion_y=0;
5352                     break;
5353                 case CANDIDATE_MB_TYPE_BIDIR_I:
5354                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5355                     s->mv_type = MV_TYPE_FIELD;
5356                     s->mb_intra= 0;
5357                     for(dir=0; dir<2; dir++){
5358                         for(i=0; i<2; i++){
5359                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5360                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5361                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5362                         }
5363                     }
5364                     motion_x=motion_y=0;
5365                     break;
5366                 default:
5367                     motion_x=motion_y=0; //gcc warning fix
5368                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5369                 }
5370
5371                 encode_mb(s, motion_x, motion_y);
5372
5373                 // RAL: Update last macroblock type
5374                 s->last_mv_dir = s->mv_dir;
5375
5376                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5377                     ff_h263_update_motion_val(s);
5378
5379                 MPV_decode_mb(s, s->block);
5380             }
5381
5382             /* clean the MV table in IPS frames for direct mode in B frames */
5383             if(s->mb_intra /* && I,P,S_TYPE */){
5384                 s->p_mv_table[xy][0]=0;
5385                 s->p_mv_table[xy][1]=0;
5386             }
5387
5388             if(s->flags&CODEC_FLAG_PSNR){
5389                 int w= 16;
5390                 int h= 16;
5391
5392                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5393                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5394
5395                 s->current_picture.error[0] += sse(
5396                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5397                     s->dest[0], w, h, s->linesize);
5398                 s->current_picture.error[1] += sse(
5399                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5400                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5401                 s->current_picture.error[2] += sse(
5402                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5403                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5404             }
5405             if(s->loop_filter){
5406                 if(s->out_format == FMT_H263)
5407                     ff_h263_loop_filter(s);
5408             }
5409 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5410         }
5411     }
5412
5413     //not beautiful here but we must write it before flushing so it has to be here
5414     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5415         msmpeg4_encode_ext_header(s);
5416
5417     write_slice_end(s);
5418
5419     /* Send the last GOB if RTP */
5420     if (s->avctx->rtp_callback) {
5421         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5422         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5423         /* Call the RTP callback to send the last GOB */
5424         emms_c();
5425         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5426     }
5427
5428     return 0;
5429 }
5430
5431 #define MERGE(field) dst->field += src->field; src->field=0
5432 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5433     MERGE(me.scene_change_score);
5434     MERGE(me.mc_mb_var_sum_temp);
5435     MERGE(me.mb_var_sum_temp);
5436 }
5437
5438 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5439     int i;
5440
5441     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5442     MERGE(dct_count[1]);
5443     MERGE(mv_bits);
5444     MERGE(i_tex_bits);
5445     MERGE(p_tex_bits);
5446     MERGE(i_count);
5447     MERGE(f_count);
5448     MERGE(b_count);
5449     MERGE(skip_count);
5450     MERGE(misc_bits);
5451     MERGE(error_count);
5452     MERGE(padding_bug_score);
5453     MERGE(current_picture.error[0]);
5454     MERGE(current_picture.error[1]);
5455     MERGE(current_picture.error[2]);
5456
5457     if(dst->avctx->noise_reduction){
5458         for(i=0; i<64; i++){
5459             MERGE(dct_error_sum[0][i]);
5460             MERGE(dct_error_sum[1][i]);
5461         }
5462     }
5463
5464     assert(put_bits_count(&src->pb) % 8 ==0);
5465     assert(put_bits_count(&dst->pb) % 8 ==0);
5466     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5467     flush_put_bits(&dst->pb);
5468 }
5469
5470 static void estimate_qp(MpegEncContext *s, int dry_run){
5471     if (!s->fixed_qscale)
5472         s->current_picture_ptr->quality=
5473         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5474
5475     if(s->adaptive_quant){
5476         switch(s->codec_id){
5477         case CODEC_ID_MPEG4:
5478             ff_clean_mpeg4_qscales(s);
5479             break;
5480         case CODEC_ID_H263:
5481         case CODEC_ID_H263P:
5482         case CODEC_ID_FLV1:
5483             ff_clean_h263_qscales(s);
5484             break;
5485         }
5486
5487         s->lambda= s->lambda_table[0];
5488         //FIXME broken
5489     }else
5490         s->lambda= s->current_picture.quality;
5491 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5492     update_qscale(s);
5493 }
5494
5495 static void encode_picture(MpegEncContext *s, int picture_number)
5496 {
5497     int i;
5498     int bits;
5499
5500     s->picture_number = picture_number;
5501
5502     /* Reset the average MB variance */
5503     s->me.mb_var_sum_temp    =
5504     s->me.mc_mb_var_sum_temp = 0;
5505
5506     /* we need to initialize some time vars before we can encode b-frames */
5507     // RAL: Condition added for MPEG1VIDEO
5508     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5509         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5510
5511     s->me.scene_change_score=0;
5512
5513 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5514
5515     if(s->pict_type==I_TYPE){
5516         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5517         else                        s->no_rounding=0;
5518     }else if(s->pict_type!=B_TYPE){
5519         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5520             s->no_rounding ^= 1;
5521     }
5522
5523     if(s->flags & CODEC_FLAG_PASS2){
5524         estimate_qp(s, 1);
5525         ff_get_2pass_fcode(s);
5526     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5527         if(s->pict_type==B_TYPE)
5528             s->lambda= s->last_lambda_for[s->pict_type];
5529         else
5530             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5531         update_qscale(s);
5532     }
5533
5534     s->mb_intra=0; //for the rate distortion & bit compare functions
5535     for(i=1; i<s->avctx->thread_count; i++){
5536         ff_update_duplicate_context(s->thread_context[i], s);
5537     }
5538
5539     ff_init_me(s);
5540
5541     /* Estimate motion for every MB */
5542     if(s->pict_type != I_TYPE){
5543         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5544         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5545         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5546             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5547                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5548             }
5549         }
5550
5551         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5552     }else /* if(s->pict_type == I_TYPE) */{
5553         /* I-Frame */
5554         for(i=0; i<s->mb_stride*s->mb_height; i++)
5555             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5556
5557         if(!s->fixed_qscale){
5558             /* finding spatial complexity for I-frame rate control */
5559             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5560         }
5561     }
5562     for(i=1; i<s->avctx->thread_count; i++){
5563         merge_context_after_me(s, s->thread_context[i]);
5564     }
5565     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5566     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5567     emms_c();
5568
5569     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5570         s->pict_type= I_TYPE;
5571         for(i=0; i<s->mb_stride*s->mb_height; i++)
5572             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5573 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5574     }
5575
5576     if(!s->umvplus){
5577         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5578             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5579
5580             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5581                 int a,b;
5582                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5583                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5584                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5585             }
5586
5587             ff_fix_long_p_mvs(s);
5588             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5589             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5590                 int j;
5591                 for(i=0; i<2; i++){
5592                     for(j=0; j<2; j++)
5593                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5594                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5595                 }
5596             }
5597         }
5598
5599         if(s->pict_type==B_TYPE){
5600             int a, b;
5601
5602             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5603             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5604             s->f_code = FFMAX(a, b);
5605
5606             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5607             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5608             s->b_code = FFMAX(a, b);
5609
5610             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5611             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5612             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5613             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5614             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5615                 int dir, j;
5616                 for(dir=0; dir<2; dir++){
5617                     for(i=0; i<2; i++){
5618                         for(j=0; j<2; j++){
5619                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5620                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5621                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5622                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5623                         }
5624                     }
5625                 }
5626             }
5627         }
5628     }
5629
5630     estimate_qp(s, 0);
5631
5632     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5633         s->qscale= 3; //reduce clipping problems
5634
5635     if (s->out_format == FMT_MJPEG) {
5636         /* for mjpeg, we do include qscale in the matrix */
5637         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5638         for(i=1;i<64;i++){
5639             int j= s->dsp.idct_permutation[i];
5640
5641             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5642         }
5643         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5644                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5645         s->qscale= 8;
5646     }
5647
5648     //FIXME var duplication
5649     s->current_picture_ptr->key_frame=
5650     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5651     s->current_picture_ptr->pict_type=
5652     s->current_picture.pict_type= s->pict_type;
5653
5654     if(s->current_picture.key_frame)
5655         s->picture_in_gop_number=0;
5656
5657     s->last_bits= put_bits_count(&s->pb);
5658     switch(s->out_format) {
5659     case FMT_MJPEG:
5660         mjpeg_picture_header(s);
5661         break;
5662 #ifdef CONFIG_H261_ENCODER
5663     case FMT_H261:
5664         ff_h261_encode_picture_header(s, picture_number);
5665         break;
5666 #endif
5667     case FMT_H263:
5668         if (s->codec_id == CODEC_ID_WMV2)
5669             ff_wmv2_encode_picture_header(s, picture_number);
5670         else if (s->h263_msmpeg4)
5671             msmpeg4_encode_picture_header(s, picture_number);
5672         else if (s->h263_pred)
5673             mpeg4_encode_picture_header(s, picture_number);
5674 #ifdef CONFIG_RV10_ENCODER
5675         else if (s->codec_id == CODEC_ID_RV10)
5676             rv10_encode_picture_header(s, picture_number);
5677 #endif
5678 #ifdef CONFIG_RV20_ENCODER
5679         else if (s->codec_id == CODEC_ID_RV20)
5680             rv20_encode_picture_header(s, picture_number);
5681 #endif
5682         else if (s->codec_id == CODEC_ID_FLV1)
5683             ff_flv_encode_picture_header(s, picture_number);
5684         else
5685             h263_encode_picture_header(s, picture_number);
5686         break;
5687     case FMT_MPEG1:
5688         mpeg1_encode_picture_header(s, picture_number);
5689         break;
5690     case FMT_H264:
5691         break;
5692     default:
5693         assert(0);
5694     }
5695     bits= put_bits_count(&s->pb);
5696     s->header_bits= bits - s->last_bits;
5697
5698     for(i=1; i<s->avctx->thread_count; i++){
5699         update_duplicate_context_after_me(s->thread_context[i], s);
5700     }
5701     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5702     for(i=1; i<s->avctx->thread_count; i++){
5703         merge_context_after_encode(s, s->thread_context[i]);
5704     }
5705     emms_c();
5706 }
5707
5708 #endif //CONFIG_ENCODERS
5709
5710 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5711     const int intra= s->mb_intra;
5712     int i;
5713
5714     s->dct_count[intra]++;
5715
5716     for(i=0; i<64; i++){
5717         int level= block[i];
5718
5719         if(level){
5720             if(level>0){
5721                 s->dct_error_sum[intra][i] += level;
5722                 level -= s->dct_offset[intra][i];
5723                 if(level<0) level=0;
5724             }else{
5725                 s->dct_error_sum[intra][i] -= level;
5726                 level += s->dct_offset[intra][i];
5727                 if(level>0) level=0;
5728             }
5729             block[i]= level;
5730         }
5731     }
5732 }
5733
5734 #ifdef CONFIG_ENCODERS
5735
5736 static int dct_quantize_trellis_c(MpegEncContext *s,
5737                         DCTELEM *block, int n,
5738                         int qscale, int *overflow){
5739     const int *qmat;
5740     const uint8_t *scantable= s->intra_scantable.scantable;
5741     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5742     int max=0;
5743     unsigned int threshold1, threshold2;
5744     int bias=0;
5745     int run_tab[65];
5746     int level_tab[65];
5747     int score_tab[65];
5748     int survivor[65];
5749     int survivor_count;
5750     int last_run=0;
5751     int last_level=0;
5752     int last_score= 0;
5753     int last_i;
5754     int coeff[2][64];
5755     int coeff_count[64];
5756     int qmul, qadd, start_i, last_non_zero, i, dc;
5757     const int esc_length= s->ac_esc_length;
5758     uint8_t * length;
5759     uint8_t * last_length;
5760     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5761
5762     s->dsp.fdct (block);
5763
5764     if(s->dct_error_sum)
5765         s->denoise_dct(s, block);
5766     qmul= qscale*16;
5767     qadd= ((qscale-1)|1)*8;
5768
5769     if (s->mb_intra) {
5770         int q;
5771         if (!s->h263_aic) {
5772             if (n < 4)
5773                 q = s->y_dc_scale;
5774             else
5775                 q = s->c_dc_scale;
5776             q = q << 3;
5777         } else{
5778             /* For AIC we skip quant/dequant of INTRADC */
5779             q = 1 << 3;
5780             qadd=0;
5781         }
5782
5783         /* note: block[0] is assumed to be positive */
5784         block[0] = (block[0] + (q >> 1)) / q;
5785         start_i = 1;
5786         last_non_zero = 0;
5787         qmat = s->q_intra_matrix[qscale];
5788         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5789             bias= 1<<(QMAT_SHIFT-1);
5790         length     = s->intra_ac_vlc_length;
5791         last_length= s->intra_ac_vlc_last_length;
5792     } else {
5793         start_i = 0;
5794         last_non_zero = -1;
5795         qmat = s->q_inter_matrix[qscale];
5796         length     = s->inter_ac_vlc_length;
5797         last_length= s->inter_ac_vlc_last_length;
5798     }
5799     last_i= start_i;
5800
5801     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5802     threshold2= (threshold1<<1);
5803
5804     for(i=63; i>=start_i; i--) {
5805         const int j = scantable[i];
5806         int level = block[j] * qmat[j];
5807
5808         if(((unsigned)(level+threshold1))>threshold2){
5809             last_non_zero = i;
5810             break;
5811         }
5812     }
5813
5814     for(i=start_i; i<=last_non_zero; i++) {
5815         const int j = scantable[i];
5816         int level = block[j] * qmat[j];
5817
5818 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5819 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5820         if(((unsigned)(level+threshold1))>threshold2){
5821             if(level>0){
5822                 level= (bias + level)>>QMAT_SHIFT;
5823                 coeff[0][i]= level;
5824                 coeff[1][i]= level-1;
5825 //                coeff[2][k]= level-2;
5826             }else{
5827                 level= (bias - level)>>QMAT_SHIFT;
5828                 coeff[0][i]= -level;
5829                 coeff[1][i]= -level+1;
5830 //                coeff[2][k]= -level+2;
5831             }
5832             coeff_count[i]= FFMIN(level, 2);
5833             assert(coeff_count[i]);
5834             max |=level;
5835         }else{
5836             coeff[0][i]= (level>>31)|1;
5837             coeff_count[i]= 1;
5838         }
5839     }
5840
5841     *overflow= s->max_qcoeff < max; //overflow might have happened
5842
5843     if(last_non_zero < start_i){
5844         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5845         return last_non_zero;
5846     }
5847
5848     score_tab[start_i]= 0;
5849     survivor[0]= start_i;
5850     survivor_count= 1;
5851
5852     for(i=start_i; i<=last_non_zero; i++){
5853         int level_index, j;
5854         const int dct_coeff= ABS(block[ scantable[i] ]);
5855         const int zero_distoration= dct_coeff*dct_coeff;
5856         int best_score=256*256*256*120;
5857         for(level_index=0; level_index < coeff_count[i]; level_index++){
5858             int distoration;
5859             int level= coeff[level_index][i];
5860             const int alevel= ABS(level);
5861             int unquant_coeff;
5862
5863             assert(level);
5864
5865             if(s->out_format == FMT_H263){
5866                 unquant_coeff= alevel*qmul + qadd;
5867             }else{ //MPEG1
5868                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5869                 if(s->mb_intra){
5870                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5871                         unquant_coeff =   (unquant_coeff - 1) | 1;
5872                 }else{
5873                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5874                         unquant_coeff =   (unquant_coeff - 1) | 1;
5875                 }
5876                 unquant_coeff<<= 3;
5877             }
5878
5879             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5880             level+=64;
5881             if((level&(~127)) == 0){
5882                 for(j=survivor_count-1; j>=0; j--){
5883                     int run= i - survivor[j];
5884                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5885                     score += score_tab[i-run];
5886
5887                     if(score < best_score){
5888                         best_score= score;
5889                         run_tab[i+1]= run;
5890                         level_tab[i+1]= level-64;
5891                     }
5892                 }
5893
5894                 if(s->out_format == FMT_H263){
5895                     for(j=survivor_count-1; j>=0; j--){
5896                         int run= i - survivor[j];
5897                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5898                         score += score_tab[i-run];
5899                         if(score < last_score){
5900                             last_score= score;
5901                             last_run= run;
5902                             last_level= level-64;
5903                             last_i= i+1;
5904                         }
5905                     }
5906                 }
5907             }else{
5908                 distoration += esc_length*lambda;
5909                 for(j=survivor_count-1; j>=0; j--){
5910                     int run= i - survivor[j];
5911                     int score= distoration + score_tab[i-run];
5912
5913                     if(score < best_score){
5914                         best_score= score;
5915                         run_tab[i+1]= run;
5916                         level_tab[i+1]= level-64;
5917                     }
5918                 }
5919
5920                 if(s->out_format == FMT_H263){
5921                   for(j=survivor_count-1; j>=0; j--){
5922                         int run= i - survivor[j];
5923                         int score= distoration + score_tab[i-run];
5924                         if(score < last_score){
5925                             last_score= score;
5926                             last_run= run;
5927                             last_level= level-64;
5928                             last_i= i+1;
5929                         }
5930                     }
5931                 }
5932             }
5933         }
5934
5935         score_tab[i+1]= best_score;
5936
5937         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5938         if(last_non_zero <= 27){
5939             for(; survivor_count; survivor_count--){
5940                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5941                     break;
5942             }
5943         }else{
5944             for(; survivor_count; survivor_count--){
5945                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5946                     break;
5947             }
5948         }
5949
5950         survivor[ survivor_count++ ]= i+1;
5951     }
5952
5953     if(s->out_format != FMT_H263){
5954         last_score= 256*256*256*120;
5955         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5956             int score= score_tab[i];
5957             if(i) score += lambda*2; //FIXME exacter?
5958
5959             if(score < last_score){
5960                 last_score= score;
5961                 last_i= i;
5962                 last_level= level_tab[i];
5963                 last_run= run_tab[i];
5964             }
5965         }
5966     }
5967
5968     s->coded_score[n] = last_score;
5969
5970     dc= ABS(block[0]);
5971     last_non_zero= last_i - 1;
5972     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5973
5974     if(last_non_zero < start_i)
5975         return last_non_zero;
5976
5977     if(last_non_zero == 0 && start_i == 0){
5978         int best_level= 0;
5979         int best_score= dc * dc;
5980
5981         for(i=0; i<coeff_count[0]; i++){
5982             int level= coeff[i][0];
5983             int alevel= ABS(level);
5984             int unquant_coeff, score, distortion;
5985
5986             if(s->out_format == FMT_H263){
5987                     unquant_coeff= (alevel*qmul + qadd)>>3;
5988             }else{ //MPEG1
5989                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5990                     unquant_coeff =   (unquant_coeff - 1) | 1;
5991             }
5992             unquant_coeff = (unquant_coeff + 4) >> 3;
5993             unquant_coeff<<= 3 + 3;
5994
5995             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5996             level+=64;
5997             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5998             else                    score= distortion + esc_length*lambda;
5999
6000             if(score < best_score){
6001                 best_score= score;
6002                 best_level= level - 64;
6003             }
6004         }
6005         block[0]= best_level;
6006         s->coded_score[n] = best_score - dc*dc;
6007         if(best_level == 0) return -1;
6008         else                return last_non_zero;
6009     }
6010
6011     i= last_i;
6012     assert(last_level);
6013
6014     block[ perm_scantable[last_non_zero] ]= last_level;
6015     i -= last_run + 1;
6016
6017     for(; i>start_i; i -= run_tab[i] + 1){
6018         block[ perm_scantable[i-1] ]= level_tab[i];
6019     }
6020
6021     return last_non_zero;
6022 }
6023
6024 //#define REFINE_STATS 1
6025 static int16_t basis[64][64];
6026
6027 static void build_basis(uint8_t *perm){
6028     int i, j, x, y;
6029     emms_c();
6030     for(i=0; i<8; i++){
6031         for(j=0; j<8; j++){
6032             for(y=0; y<8; y++){
6033                 for(x=0; x<8; x++){
6034                     double s= 0.25*(1<<BASIS_SHIFT);
6035                     int index= 8*i + j;
6036                     int perm_index= perm[index];
6037                     if(i==0) s*= sqrt(0.5);
6038                     if(j==0) s*= sqrt(0.5);
6039                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6040                 }
6041             }
6042         }
6043     }
6044 }
6045
6046 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6047                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6048                         int n, int qscale){
6049     int16_t rem[64];
6050     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6051     const int *qmat;
6052     const uint8_t *scantable= s->intra_scantable.scantable;
6053     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6054 //    unsigned int threshold1, threshold2;
6055 //    int bias=0;
6056     int run_tab[65];
6057     int prev_run=0;
6058     int prev_level=0;
6059     int qmul, qadd, start_i, last_non_zero, i, dc;
6060     uint8_t * length;
6061     uint8_t * last_length;
6062     int lambda;
6063     int rle_index, run, q, sum;
6064 #ifdef REFINE_STATS
6065 static int count=0;
6066 static int after_last=0;
6067 static int to_zero=0;
6068 static int from_zero=0;
6069 static int raise=0;
6070 static int lower=0;
6071 static int messed_sign=0;
6072 #endif
6073
6074     if(basis[0][0] == 0)
6075         build_basis(s->dsp.idct_permutation);
6076
6077     qmul= qscale*2;
6078     qadd= (qscale-1)|1;
6079     if (s->mb_intra) {
6080         if (!s->h263_aic) {
6081             if (n < 4)
6082                 q = s->y_dc_scale;
6083             else
6084                 q = s->c_dc_scale;
6085         } else{
6086             /* For AIC we skip quant/dequant of INTRADC */
6087             q = 1;
6088             qadd=0;
6089         }
6090         q <<= RECON_SHIFT-3;
6091         /* note: block[0] is assumed to be positive */
6092         dc= block[0]*q;
6093 //        block[0] = (block[0] + (q >> 1)) / q;
6094         start_i = 1;
6095         qmat = s->q_intra_matrix[qscale];
6096 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6097 //            bias= 1<<(QMAT_SHIFT-1);
6098         length     = s->intra_ac_vlc_length;
6099         last_length= s->intra_ac_vlc_last_length;
6100     } else {
6101         dc= 0;
6102         start_i = 0;
6103         qmat = s->q_inter_matrix[qscale];
6104         length     = s->inter_ac_vlc_length;
6105         last_length= s->inter_ac_vlc_last_length;
6106     }
6107     last_non_zero = s->block_last_index[n];
6108
6109 #ifdef REFINE_STATS
6110 {START_TIMER
6111 #endif
6112     dc += (1<<(RECON_SHIFT-1));
6113     for(i=0; i<64; i++){
6114         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6115     }
6116 #ifdef REFINE_STATS
6117 STOP_TIMER("memset rem[]")}
6118 #endif
6119     sum=0;
6120     for(i=0; i<64; i++){
6121         int one= 36;
6122         int qns=4;
6123         int w;
6124
6125         w= ABS(weight[i]) + qns*one;
6126         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6127
6128         weight[i] = w;
6129 //        w=weight[i] = (63*qns + (w/2)) / w;
6130
6131         assert(w>0);
6132         assert(w<(1<<6));
6133         sum += w*w;
6134     }
6135     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6136 #ifdef REFINE_STATS
6137 {START_TIMER
6138 #endif
6139     run=0;
6140     rle_index=0;
6141     for(i=start_i; i<=last_non_zero; i++){
6142         int j= perm_scantable[i];
6143         const int level= block[j];
6144         int coeff;
6145
6146         if(level){
6147             if(level<0) coeff= qmul*level - qadd;
6148             else        coeff= qmul*level + qadd;
6149             run_tab[rle_index++]=run;
6150             run=0;
6151
6152             s->dsp.add_8x8basis(rem, basis[j], coeff);
6153         }else{
6154             run++;
6155         }
6156     }
6157 #ifdef REFINE_STATS
6158 if(last_non_zero>0){
6159 STOP_TIMER("init rem[]")
6160 }
6161 }
6162
6163 {START_TIMER
6164 #endif
6165     for(;;){
6166         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6167         int best_coeff=0;
6168         int best_change=0;
6169         int run2, best_unquant_change=0, analyze_gradient;
6170 #ifdef REFINE_STATS
6171 {START_TIMER
6172 #endif
6173         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6174
6175         if(analyze_gradient){
6176 #ifdef REFINE_STATS
6177 {START_TIMER
6178 #endif
6179             for(i=0; i<64; i++){
6180                 int w= weight[i];
6181
6182                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6183             }
6184 #ifdef REFINE_STATS
6185 STOP_TIMER("rem*w*w")}
6186 {START_TIMER
6187 #endif
6188             s->dsp.fdct(d1);
6189 #ifdef REFINE_STATS
6190 STOP_TIMER("dct")}
6191 #endif
6192         }
6193
6194         if(start_i){
6195             const int level= block[0];
6196             int change, old_coeff;
6197
6198             assert(s->mb_intra);
6199
6200             old_coeff= q*level;
6201
6202             for(change=-1; change<=1; change+=2){
6203                 int new_level= level + change;
6204                 int score, new_coeff;
6205
6206                 new_coeff= q*new_level;
6207                 if(new_coeff >= 2048 || new_coeff < 0)
6208                     continue;
6209
6210                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6211                 if(score<best_score){
6212                     best_score= score;
6213                     best_coeff= 0;
6214                     best_change= change;
6215                     best_unquant_change= new_coeff - old_coeff;
6216                 }
6217             }
6218         }
6219
6220         run=0;
6221         rle_index=0;
6222         run2= run_tab[rle_index++];
6223         prev_level=0;
6224         prev_run=0;
6225
6226         for(i=start_i; i<64; i++){
6227             int j= perm_scantable[i];
6228             const int level= block[j];
6229             int change, old_coeff;
6230
6231             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6232                 break;
6233
6234             if(level){
6235                 if(level<0) old_coeff= qmul*level - qadd;
6236                 else        old_coeff= qmul*level + qadd;
6237                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6238             }else{
6239                 old_coeff=0;
6240                 run2--;
6241                 assert(run2>=0 || i >= last_non_zero );
6242             }
6243
6244             for(change=-1; change<=1; change+=2){
6245                 int new_level= level + change;
6246                 int score, new_coeff, unquant_change;
6247
6248                 score=0;
6249                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6250                    continue;
6251
6252                 if(new_level){
6253                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6254                     else            new_coeff= qmul*new_level + qadd;
6255                     if(new_coeff >= 2048 || new_coeff <= -2048)
6256                         continue;
6257                     //FIXME check for overflow
6258
6259                     if(level){
6260                         if(level < 63 && level > -63){
6261                             if(i < last_non_zero)
6262                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6263                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6264                             else
6265                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6266                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6267                         }
6268                     }else{
6269                         assert(ABS(new_level)==1);
6270
6271                         if(analyze_gradient){
6272                             int g= d1[ scantable[i] ];
6273                             if(g && (g^new_level) >= 0)
6274                                 continue;
6275                         }
6276
6277                         if(i < last_non_zero){
6278                             int next_i= i + run2 + 1;
6279                             int next_level= block[ perm_scantable[next_i] ] + 64;
6280
6281                             if(next_level&(~127))
6282                                 next_level= 0;
6283
6284                             if(next_i < last_non_zero)
6285                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6286                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6287                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6288                             else
6289                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6290                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6291                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6292                         }else{
6293                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6294                             if(prev_level){
6295                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6296                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6297                             }
6298                         }
6299                     }
6300                 }else{
6301                     new_coeff=0;
6302                     assert(ABS(level)==1);
6303
6304                     if(i < last_non_zero){
6305                         int next_i= i + run2 + 1;
6306                         int next_level= block[ perm_scantable[next_i] ] + 64;
6307
6308                         if(next_level&(~127))
6309                             next_level= 0;
6310
6311                         if(next_i < last_non_zero)
6312                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6313                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6314                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6315                         else
6316                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6317                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6318                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6319                     }else{
6320                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6321                         if(prev_level){
6322                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6323                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6324                         }
6325                     }
6326                 }
6327
6328                 score *= lambda;
6329
6330                 unquant_change= new_coeff - old_coeff;
6331                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6332
6333                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6334                 if(score<best_score){
6335                     best_score= score;
6336                     best_coeff= i;
6337                     best_change= change;
6338                     best_unquant_change= unquant_change;
6339                 }
6340             }
6341             if(level){
6342                 prev_level= level + 64;
6343                 if(prev_level&(~127))
6344                     prev_level= 0;
6345                 prev_run= run;
6346                 run=0;
6347             }else{
6348                 run++;
6349             }
6350         }
6351 #ifdef REFINE_STATS
6352 STOP_TIMER("iterative step")}
6353 #endif
6354
6355         if(best_change){
6356             int j= perm_scantable[ best_coeff ];
6357
6358             block[j] += best_change;
6359
6360             if(best_coeff > last_non_zero){
6361                 last_non_zero= best_coeff;
6362                 assert(block[j]);
6363 #ifdef REFINE_STATS
6364 after_last++;
6365 #endif
6366             }else{
6367 #ifdef REFINE_STATS
6368 if(block[j]){
6369     if(block[j] - best_change){
6370         if(ABS(block[j]) > ABS(block[j] - best_change)){
6371             raise++;
6372         }else{
6373             lower++;
6374         }
6375     }else{
6376         from_zero++;
6377     }
6378 }else{
6379     to_zero++;
6380 }
6381 #endif
6382                 for(; last_non_zero>=start_i; last_non_zero--){
6383                     if(block[perm_scantable[last_non_zero]])
6384                         break;
6385                 }
6386             }
6387 #ifdef REFINE_STATS
6388 count++;
6389 if(256*256*256*64 % count == 0){
6390     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6391 }
6392 #endif
6393             run=0;
6394             rle_index=0;
6395             for(i=start_i; i<=last_non_zero; i++){
6396                 int j= perm_scantable[i];
6397                 const int level= block[j];
6398
6399                  if(level){
6400                      run_tab[rle_index++]=run;
6401                      run=0;
6402                  }else{
6403                      run++;
6404                  }
6405             }
6406
6407             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6408         }else{
6409             break;
6410         }
6411     }
6412 #ifdef REFINE_STATS
6413 if(last_non_zero>0){
6414 STOP_TIMER("iterative search")
6415 }
6416 }
6417 #endif
6418
6419     return last_non_zero;
6420 }
6421
6422 static int dct_quantize_c(MpegEncContext *s,
6423                         DCTELEM *block, int n,
6424                         int qscale, int *overflow)
6425 {
6426     int i, j, level, last_non_zero, q, start_i;
6427     const int *qmat;
6428     const uint8_t *scantable= s->intra_scantable.scantable;
6429     int bias;
6430     int max=0;
6431     unsigned int threshold1, threshold2;
6432
6433     s->dsp.fdct (block);
6434
6435     if(s->dct_error_sum)
6436         s->denoise_dct(s, block);
6437
6438     if (s->mb_intra) {
6439         if (!s->h263_aic) {
6440             if (n < 4)
6441                 q = s->y_dc_scale;
6442             else
6443                 q = s->c_dc_scale;
6444             q = q << 3;
6445         } else
6446             /* For AIC we skip quant/dequant of INTRADC */
6447             q = 1 << 3;
6448
6449         /* note: block[0] is assumed to be positive */
6450         block[0] = (block[0] + (q >> 1)) / q;
6451         start_i = 1;
6452         last_non_zero = 0;
6453         qmat = s->q_intra_matrix[qscale];
6454         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6455     } else {
6456         start_i = 0;
6457         last_non_zero = -1;
6458         qmat = s->q_inter_matrix[qscale];
6459         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6460     }
6461     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6462     threshold2= (threshold1<<1);
6463     for(i=63;i>=start_i;i--) {
6464         j = scantable[i];
6465         level = block[j] * qmat[j];
6466
6467         if(((unsigned)(level+threshold1))>threshold2){
6468             last_non_zero = i;
6469             break;
6470         }else{
6471             block[j]=0;
6472         }
6473     }
6474     for(i=start_i; i<=last_non_zero; i++) {
6475         j = scantable[i];
6476         level = block[j] * qmat[j];
6477
6478 //        if(   bias+level >= (1<<QMAT_SHIFT)
6479 //           || bias-level >= (1<<QMAT_SHIFT)){
6480         if(((unsigned)(level+threshold1))>threshold2){
6481             if(level>0){
6482                 level= (bias + level)>>QMAT_SHIFT;
6483                 block[j]= level;
6484             }else{
6485                 level= (bias - level)>>QMAT_SHIFT;
6486                 block[j]= -level;
6487             }
6488             max |=level;
6489         }else{
6490             block[j]=0;
6491         }
6492     }
6493     *overflow= s->max_qcoeff < max; //overflow might have happened
6494
6495     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6496     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6497         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6498
6499     return last_non_zero;
6500 }
6501
6502 #endif //CONFIG_ENCODERS
6503
6504 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6505                                    DCTELEM *block, int n, int qscale)
6506 {
6507     int i, level, nCoeffs;
6508     const uint16_t *quant_matrix;
6509
6510     nCoeffs= s->block_last_index[n];
6511
6512     if (n < 4)
6513         block[0] = block[0] * s->y_dc_scale;
6514     else
6515         block[0] = block[0] * s->c_dc_scale;
6516     /* XXX: only mpeg1 */
6517     quant_matrix = s->intra_matrix;
6518     for(i=1;i<=nCoeffs;i++) {
6519         int j= s->intra_scantable.permutated[i];
6520         level = block[j];
6521         if (level) {
6522             if (level < 0) {
6523                 level = -level;
6524                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6525                 level = (level - 1) | 1;
6526                 level = -level;
6527             } else {
6528                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6529                 level = (level - 1) | 1;
6530             }
6531             block[j] = level;
6532         }
6533     }
6534 }
6535
6536 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6537                                    DCTELEM *block, int n, int qscale)
6538 {
6539     int i, level, nCoeffs;
6540     const uint16_t *quant_matrix;
6541
6542     nCoeffs= s->block_last_index[n];
6543
6544     quant_matrix = s->inter_matrix;
6545     for(i=0; i<=nCoeffs; i++) {
6546         int j= s->intra_scantable.permutated[i];
6547         level = block[j];
6548         if (level) {
6549             if (level < 0) {
6550                 level = -level;
6551                 level = (((level << 1) + 1) * qscale *
6552                          ((int) (quant_matrix[j]))) >> 4;
6553                 level = (level - 1) | 1;
6554                 level = -level;
6555             } else {
6556                 level = (((level << 1) + 1) * qscale *
6557                          ((int) (quant_matrix[j]))) >> 4;
6558                 level = (level - 1) | 1;
6559             }
6560             block[j] = level;
6561         }
6562     }
6563 }
6564
6565 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6566                                    DCTELEM *block, int n, int qscale)
6567 {
6568     int i, level, nCoeffs;
6569     const uint16_t *quant_matrix;
6570
6571     if(s->alternate_scan) nCoeffs= 63;
6572     else nCoeffs= s->block_last_index[n];
6573
6574     if (n < 4)
6575         block[0] = block[0] * s->y_dc_scale;
6576     else
6577         block[0] = block[0] * s->c_dc_scale;
6578     quant_matrix = s->intra_matrix;
6579     for(i=1;i<=nCoeffs;i++) {
6580         int j= s->intra_scantable.permutated[i];
6581         level = block[j];
6582         if (level) {
6583             if (level < 0) {
6584                 level = -level;
6585                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6586                 level = -level;
6587             } else {
6588                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6589             }
6590             block[j] = level;
6591         }
6592     }
6593 }
6594
6595 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6596                                    DCTELEM *block, int n, int qscale)
6597 {
6598     int i, level, nCoeffs;
6599     const uint16_t *quant_matrix;
6600     int sum=-1;
6601
6602     if(s->alternate_scan) nCoeffs= 63;
6603     else nCoeffs= s->block_last_index[n];
6604
6605     if (n < 4)
6606         block[0] = block[0] * s->y_dc_scale;
6607     else
6608         block[0] = block[0] * s->c_dc_scale;
6609     quant_matrix = s->intra_matrix;
6610     for(i=1;i<=nCoeffs;i++) {
6611         int j= s->intra_scantable.permutated[i];
6612         level = block[j];
6613         if (level) {
6614             if (level < 0) {
6615                 level = -level;
6616                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6617                 level = -level;
6618             } else {
6619                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6620             }
6621             block[j] = level;
6622             sum+=level;
6623         }
6624     }
6625     block[63]^=sum&1;
6626 }
6627
6628 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6629                                    DCTELEM *block, int n, int qscale)
6630 {
6631     int i, level, nCoeffs;
6632     const uint16_t *quant_matrix;
6633     int sum=-1;
6634
6635     if(s->alternate_scan) nCoeffs= 63;
6636     else nCoeffs= s->block_last_index[n];
6637
6638     quant_matrix = s->inter_matrix;
6639     for(i=0; i<=nCoeffs; i++) {
6640         int j= s->intra_scantable.permutated[i];
6641         level = block[j];
6642         if (level) {
6643             if (level < 0) {
6644                 level = -level;
6645                 level = (((level << 1) + 1) * qscale *
6646                          ((int) (quant_matrix[j]))) >> 4;
6647                 level = -level;
6648             } else {
6649                 level = (((level << 1) + 1) * qscale *
6650                          ((int) (quant_matrix[j]))) >> 4;
6651             }
6652             block[j] = level;
6653             sum+=level;
6654         }
6655     }
6656     block[63]^=sum&1;
6657 }
6658
6659 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6660                                   DCTELEM *block, int n, int qscale)
6661 {
6662     int i, level, qmul, qadd;
6663     int nCoeffs;
6664
6665     assert(s->block_last_index[n]>=0);
6666
6667     qmul = qscale << 1;
6668
6669     if (!s->h263_aic) {
6670         if (n < 4)
6671             block[0] = block[0] * s->y_dc_scale;
6672         else
6673             block[0] = block[0] * s->c_dc_scale;
6674         qadd = (qscale - 1) | 1;
6675     }else{
6676         qadd = 0;
6677     }
6678     if(s->ac_pred)
6679         nCoeffs=63;
6680     else
6681         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6682
6683     for(i=1; i<=nCoeffs; i++) {
6684         level = block[i];
6685         if (level) {
6686             if (level < 0) {
6687                 level = level * qmul - qadd;
6688             } else {
6689                 level = level * qmul + qadd;
6690             }
6691             block[i] = level;
6692         }
6693     }
6694 }
6695
6696 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6697                                   DCTELEM *block, int n, int qscale)
6698 {
6699     int i, level, qmul, qadd;
6700     int nCoeffs;
6701
6702     assert(s->block_last_index[n]>=0);
6703
6704     qadd = (qscale - 1) | 1;
6705     qmul = qscale << 1;
6706
6707     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6708
6709     for(i=0; i<=nCoeffs; i++) {
6710         level = block[i];
6711         if (level) {
6712             if (level < 0) {
6713                 level = level * qmul - qadd;
6714             } else {
6715                 level = level * qmul + qadd;
6716             }
6717             block[i] = level;
6718         }
6719     }
6720 }
6721
6722 #ifdef CONFIG_ENCODERS
6723 AVCodec h263_encoder = {
6724     "h263",
6725     CODEC_TYPE_VIDEO,
6726     CODEC_ID_H263,
6727     sizeof(MpegEncContext),
6728     MPV_encode_init,
6729     MPV_encode_picture,
6730     MPV_encode_end,
6731     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6732 };
6733
6734 AVCodec h263p_encoder = {
6735     "h263p",
6736     CODEC_TYPE_VIDEO,
6737     CODEC_ID_H263P,
6738     sizeof(MpegEncContext),
6739     MPV_encode_init,
6740     MPV_encode_picture,
6741     MPV_encode_end,
6742     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6743 };
6744
6745 AVCodec flv_encoder = {
6746     "flv",
6747     CODEC_TYPE_VIDEO,
6748     CODEC_ID_FLV1,
6749     sizeof(MpegEncContext),
6750     MPV_encode_init,
6751     MPV_encode_picture,
6752     MPV_encode_end,
6753     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6754 };
6755
6756 AVCodec rv10_encoder = {
6757     "rv10",
6758     CODEC_TYPE_VIDEO,
6759     CODEC_ID_RV10,
6760     sizeof(MpegEncContext),
6761     MPV_encode_init,
6762     MPV_encode_picture,
6763     MPV_encode_end,
6764     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6765 };
6766
6767 AVCodec rv20_encoder = {
6768     "rv20",
6769     CODEC_TYPE_VIDEO,
6770     CODEC_ID_RV20,
6771     sizeof(MpegEncContext),
6772     MPV_encode_init,
6773     MPV_encode_picture,
6774     MPV_encode_end,
6775     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6776 };
6777
6778 AVCodec mpeg4_encoder = {
6779     "mpeg4",
6780     CODEC_TYPE_VIDEO,
6781     CODEC_ID_MPEG4,
6782     sizeof(MpegEncContext),
6783     MPV_encode_init,
6784     MPV_encode_picture,
6785     MPV_encode_end,
6786     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6787     .capabilities= CODEC_CAP_DELAY,
6788 };
6789
6790 AVCodec msmpeg4v1_encoder = {
6791     "msmpeg4v1",
6792     CODEC_TYPE_VIDEO,
6793     CODEC_ID_MSMPEG4V1,
6794     sizeof(MpegEncContext),
6795     MPV_encode_init,
6796     MPV_encode_picture,
6797     MPV_encode_end,
6798     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6799 };
6800
6801 AVCodec msmpeg4v2_encoder = {
6802     "msmpeg4v2",
6803     CODEC_TYPE_VIDEO,
6804     CODEC_ID_MSMPEG4V2,
6805     sizeof(MpegEncContext),
6806     MPV_encode_init,
6807     MPV_encode_picture,
6808     MPV_encode_end,
6809     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6810 };
6811
6812 AVCodec msmpeg4v3_encoder = {
6813     "msmpeg4",
6814     CODEC_TYPE_VIDEO,
6815     CODEC_ID_MSMPEG4V3,
6816     sizeof(MpegEncContext),
6817     MPV_encode_init,
6818     MPV_encode_picture,
6819     MPV_encode_end,
6820     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6821 };
6822
6823 AVCodec wmv1_encoder = {
6824     "wmv1",
6825     CODEC_TYPE_VIDEO,
6826     CODEC_ID_WMV1,
6827     sizeof(MpegEncContext),
6828     MPV_encode_init,
6829     MPV_encode_picture,
6830     MPV_encode_end,
6831     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6832 };
6833
6834 AVCodec mjpeg_encoder = {
6835     "mjpeg",
6836     CODEC_TYPE_VIDEO,
6837     CODEC_ID_MJPEG,
6838     sizeof(MpegEncContext),
6839     MPV_encode_init,
6840     MPV_encode_picture,
6841     MPV_encode_end,
6842     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6843 };
6844
6845 #endif //CONFIG_ENCODERS