]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
Skip the MMX/SSE and memalign() check when running on OS X/Darwin, *alloc
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */
27
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static void encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
57                                   DCTELEM *block, int n, int qscale);
58 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
59 #ifdef CONFIG_ENCODERS
60 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
61 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
62 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
63 static int sse_mb(MpegEncContext *s);
64 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
65 #endif //CONFIG_ENCODERS
66
67 #ifdef HAVE_XVMC
68 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
69 extern void XVMC_field_end(MpegEncContext *s);
70 extern void XVMC_decode_mb(MpegEncContext *s);
71 #endif
72
73 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
74
75
76 /* enable all paranoid tests for rounding, overflows, etc... */
77 //#define PARANOID
78
79 //#define DEBUG
80
81
82 /* for jpeg fast DCT */
83 #define CONST_BITS 14
84
85 static const uint16_t aanscales[64] = {
86     /* precomputed values scaled up by 14 bits */
87     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
88     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
89     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
90     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
91     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
92     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
93     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
94     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
95 };
96
97 static const uint8_t h263_chroma_roundtab[16] = {
98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
99     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
100 };
101
102 static const uint8_t ff_default_chroma_qscale_table[32]={
103 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
104     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
105 };
106
107 #ifdef CONFIG_ENCODERS
108 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
109 static uint8_t default_fcode_tab[MAX_MV*2+1];
110
111 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
112
113 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
114                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
115 {
116     int qscale;
117     int shift=0;
118
119     for(qscale=qmin; qscale<=qmax; qscale++){
120         int i;
121         if (dsp->fdct == ff_jpeg_fdct_islow
122 #ifdef FAAN_POSTSCALE
123             || dsp->fdct == ff_faandct
124 #endif
125             ) {
126             for(i=0;i<64;i++) {
127                 const int j= dsp->idct_permutation[i];
128                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
129                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
130                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
131                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
132
133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
134                                 (qscale * quant_matrix[j]));
135             }
136         } else if (dsp->fdct == fdct_ifast
137 #ifndef FAAN_POSTSCALE
138                    || dsp->fdct == ff_faandct
139 #endif
140                    ) {
141             for(i=0;i<64;i++) {
142                 const int j= dsp->idct_permutation[i];
143                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
144                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
145                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
146                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
147
148                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
149                                 (aanscales[i] * qscale * quant_matrix[j]));
150             }
151         } else {
152             for(i=0;i<64;i++) {
153                 const int j= dsp->idct_permutation[i];
154                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
155                    So 16           <= qscale * quant_matrix[i]             <= 7905
156                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
157                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
158                 */
159                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
160 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
161                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
162
163                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
164                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
165             }
166         }
167
168         for(i=intra; i<64; i++){
169             int64_t max= 8191;
170             if (dsp->fdct == fdct_ifast
171 #ifndef FAAN_POSTSCALE
172                    || dsp->fdct == ff_faandct
173 #endif
174                    ) {
175                 max= (8191LL*aanscales[i]) >> 14;
176             }
177             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
178                 shift++;
179             }
180         }
181     }
182     if(shift){
183         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
184     }
185 }
186
187 static inline void update_qscale(MpegEncContext *s){
188     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
189     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
190
191     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
192 }
193 #endif //CONFIG_ENCODERS
194
195 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
196     int i;
197     int end;
198
199     st->scantable= src_scantable;
200
201     for(i=0; i<64; i++){
202         int j;
203         j = src_scantable[i];
204         st->permutated[i] = permutation[j];
205 #ifdef ARCH_POWERPC
206         st->inverse[j] = i;
207 #endif
208     }
209
210     end=-1;
211     for(i=0; i<64; i++){
212         int j;
213         j = st->permutated[i];
214         if(j>end) end=j;
215         st->raster_end[i]= end;
216     }
217 }
218
219 #ifdef CONFIG_ENCODERS
220 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
221     int i;
222
223     if(matrix){
224         put_bits(pb, 1, 1);
225         for(i=0;i<64;i++) {
226             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
227         }
228     }else
229         put_bits(pb, 1, 0);
230 }
231 #endif //CONFIG_ENCODERS
232
233 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
234     int i;
235
236     assert(p<=end);
237     if(p>=end)
238         return end;
239
240     for(i=0; i<3; i++){
241         uint32_t tmp= *state << 8;
242         *state= tmp + *(p++);
243         if(tmp == 0x100 || p==end)
244             return p;
245     }
246
247     while(p<end){
248         if     (p[-1] > 1      ) p+= 3;
249         else if(p[-2]          ) p+= 2;
250         else if(p[-3]|(p[-1]-1)) p++;
251         else{
252             p++;
253             break;
254         }
255     }
256
257     p= FFMIN(p, end)-4;
258     *state=  be2me_32(unaligned32(p));
259
260     return p+4;
261 }
262
263 /* init common dct for both encoder and decoder */
264 int DCT_common_init(MpegEncContext *s)
265 {
266     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
267     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
268     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
269     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
270     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
271     if(s->flags & CODEC_FLAG_BITEXACT)
272         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
273     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
274
275 #ifdef CONFIG_ENCODERS
276     s->dct_quantize= dct_quantize_c;
277     s->denoise_dct= denoise_dct_c;
278 #endif //CONFIG_ENCODERS
279
280 #ifdef HAVE_MMX
281     MPV_common_init_mmx(s);
282 #endif
283 #ifdef ARCH_ALPHA
284     MPV_common_init_axp(s);
285 #endif
286 #ifdef HAVE_MLIB
287     MPV_common_init_mlib(s);
288 #endif
289 #ifdef HAVE_MMI
290     MPV_common_init_mmi(s);
291 #endif
292 #ifdef ARCH_ARMV4L
293     MPV_common_init_armv4l(s);
294 #endif
295 #ifdef ARCH_POWERPC
296     MPV_common_init_ppc(s);
297 #endif
298
299 #ifdef CONFIG_ENCODERS
300     s->fast_dct_quantize= s->dct_quantize;
301
302     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
303         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
304     }
305
306 #endif //CONFIG_ENCODERS
307
308     /* load & permutate scantables
309        note: only wmv uses different ones
310     */
311     if(s->alternate_scan){
312         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
313         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
314     }else{
315         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
316         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
317     }
318     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
319     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
320
321     return 0;
322 }
323
324 static void copy_picture(Picture *dst, Picture *src){
325     *dst = *src;
326     dst->type= FF_BUFFER_TYPE_COPY;
327 }
328
329 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
330     int i;
331
332     dst->pict_type              = src->pict_type;
333     dst->quality                = src->quality;
334     dst->coded_picture_number   = src->coded_picture_number;
335     dst->display_picture_number = src->display_picture_number;
336 //    dst->reference              = src->reference;
337     dst->pts                    = src->pts;
338     dst->interlaced_frame       = src->interlaced_frame;
339     dst->top_field_first        = src->top_field_first;
340
341     if(s->avctx->me_threshold){
342         if(!src->motion_val[0])
343             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
344         if(!src->mb_type)
345             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
346         if(!src->ref_index[0])
347             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
348         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
349             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
350             src->motion_subsample_log2, dst->motion_subsample_log2);
351
352         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
353
354         for(i=0; i<2; i++){
355             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
356             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
357
358             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
359                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
360             }
361             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
362                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
363             }
364         }
365     }
366 }
367
368 /**
369  * allocates a Picture
370  * The pixels are allocated/set by calling get_buffer() if shared=0
371  */
372 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
373     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
374     const int mb_array_size= s->mb_stride*s->mb_height;
375     const int b8_array_size= s->b8_stride*s->mb_height*2;
376     const int b4_array_size= s->b4_stride*s->mb_height*4;
377     int i;
378
379     if(shared){
380         assert(pic->data[0]);
381         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
382         pic->type= FF_BUFFER_TYPE_SHARED;
383     }else{
384         int r;
385
386         assert(!pic->data[0]);
387
388         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
389
390         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
391             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
392             return -1;
393         }
394
395         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
396             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
397             return -1;
398         }
399
400         if(pic->linesize[1] != pic->linesize[2]){
401             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
402             return -1;
403         }
404
405         s->linesize  = pic->linesize[0];
406         s->uvlinesize= pic->linesize[1];
407     }
408
409     if(pic->qscale_table==NULL){
410         if (s->encoding) {
411             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
412             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
413             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
414         }
415
416         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
417         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
418         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
419         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
420         if(s->out_format == FMT_H264){
421             for(i=0; i<2; i++){
422                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
423                 pic->motion_val[i]= pic->motion_val_base[i]+4;
424                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
425             }
426             pic->motion_subsample_log2= 2;
427         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
428             for(i=0; i<2; i++){
429                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
430                 pic->motion_val[i]= pic->motion_val_base[i]+4;
431                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
432             }
433             pic->motion_subsample_log2= 3;
434         }
435         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
436             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
437         }
438         pic->qstride= s->mb_stride;
439         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
440     }
441
442     //it might be nicer if the application would keep track of these but it would require a API change
443     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
444     s->prev_pict_types[0]= s->pict_type;
445     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
446         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
447
448     return 0;
449 fail: //for the CHECKED_ALLOCZ macro
450     return -1;
451 }
452
453 /**
454  * deallocates a picture
455  */
456 static void free_picture(MpegEncContext *s, Picture *pic){
457     int i;
458
459     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
460         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
461     }
462
463     av_freep(&pic->mb_var);
464     av_freep(&pic->mc_mb_var);
465     av_freep(&pic->mb_mean);
466     av_freep(&pic->mbskip_table);
467     av_freep(&pic->qscale_table);
468     av_freep(&pic->mb_type_base);
469     av_freep(&pic->dct_coeff);
470     av_freep(&pic->pan_scan);
471     pic->mb_type= NULL;
472     for(i=0; i<2; i++){
473         av_freep(&pic->motion_val_base[i]);
474         av_freep(&pic->ref_index[i]);
475     }
476
477     if(pic->type == FF_BUFFER_TYPE_SHARED){
478         for(i=0; i<4; i++){
479             pic->base[i]=
480             pic->data[i]= NULL;
481         }
482         pic->type= 0;
483     }
484 }
485
486 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
487     int i;
488
489     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
490     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
491     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
492
493      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
494     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
495     s->rd_scratchpad=   s->me.scratchpad;
496     s->b_scratchpad=    s->me.scratchpad;
497     s->obmc_scratchpad= s->me.scratchpad + 16;
498     if (s->encoding) {
499         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
500         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
501         if(s->avctx->noise_reduction){
502             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
503         }
504     }
505     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
506     s->block= s->blocks[0];
507
508     for(i=0;i<12;i++){
509         s->pblocks[i] = (short *)(&s->block[i]);
510     }
511     return 0;
512 fail:
513     return -1; //free() through MPV_common_end()
514 }
515
516 static void free_duplicate_context(MpegEncContext *s){
517     if(s==NULL) return;
518
519     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
520     av_freep(&s->me.scratchpad);
521     s->rd_scratchpad=
522     s->b_scratchpad=
523     s->obmc_scratchpad= NULL;
524
525     av_freep(&s->dct_error_sum);
526     av_freep(&s->me.map);
527     av_freep(&s->me.score_map);
528     av_freep(&s->blocks);
529     s->block= NULL;
530 }
531
532 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
533 #define COPY(a) bak->a= src->a
534     COPY(allocated_edge_emu_buffer);
535     COPY(edge_emu_buffer);
536     COPY(me.scratchpad);
537     COPY(rd_scratchpad);
538     COPY(b_scratchpad);
539     COPY(obmc_scratchpad);
540     COPY(me.map);
541     COPY(me.score_map);
542     COPY(blocks);
543     COPY(block);
544     COPY(start_mb_y);
545     COPY(end_mb_y);
546     COPY(me.map_generation);
547     COPY(pb);
548     COPY(dct_error_sum);
549     COPY(dct_count[0]);
550     COPY(dct_count[1]);
551 #undef COPY
552 }
553
554 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
555     MpegEncContext bak;
556     int i;
557     //FIXME copy only needed parts
558 //START_TIMER
559     backup_duplicate_context(&bak, dst);
560     memcpy(dst, src, sizeof(MpegEncContext));
561     backup_duplicate_context(dst, &bak);
562     for(i=0;i<12;i++){
563         dst->pblocks[i] = (short *)(&dst->block[i]);
564     }
565 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
566 }
567
568 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
569 #define COPY(a) dst->a= src->a
570     COPY(pict_type);
571     COPY(current_picture);
572     COPY(f_code);
573     COPY(b_code);
574     COPY(qscale);
575     COPY(lambda);
576     COPY(lambda2);
577     COPY(picture_in_gop_number);
578     COPY(gop_picture_number);
579     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
580     COPY(progressive_frame); //FIXME don't set in encode_header
581     COPY(partitioned_frame); //FIXME don't set in encode_header
582 #undef COPY
583 }
584
585 /**
586  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
587  * the changed fields will not depend upon the prior state of the MpegEncContext.
588  */
589 static void MPV_common_defaults(MpegEncContext *s){
590     s->y_dc_scale_table=
591     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
592     s->chroma_qscale_table= ff_default_chroma_qscale_table;
593     s->progressive_frame= 1;
594     s->progressive_sequence= 1;
595     s->picture_structure= PICT_FRAME;
596
597     s->coded_picture_number = 0;
598     s->picture_number = 0;
599     s->input_picture_number = 0;
600
601     s->picture_in_gop_number = 0;
602
603     s->f_code = 1;
604     s->b_code = 1;
605 }
606
607 /**
608  * sets the given MpegEncContext to defaults for decoding.
609  * the changed fields will not depend upon the prior state of the MpegEncContext.
610  */
611 void MPV_decode_defaults(MpegEncContext *s){
612     MPV_common_defaults(s);
613 }
614
615 /**
616  * sets the given MpegEncContext to defaults for encoding.
617  * the changed fields will not depend upon the prior state of the MpegEncContext.
618  */
619
620 #ifdef CONFIG_ENCODERS
621 static void MPV_encode_defaults(MpegEncContext *s){
622     static int done=0;
623
624     MPV_common_defaults(s);
625
626     if(!done){
627         int i;
628         done=1;
629
630         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
631         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
632
633         for(i=-16; i<16; i++){
634             default_fcode_tab[i + MAX_MV]= 1;
635         }
636     }
637     s->me.mv_penalty= default_mv_penalty;
638     s->fcode_tab= default_fcode_tab;
639 }
640 #endif //CONFIG_ENCODERS
641
642 /**
643  * init common structure for both encoder and decoder.
644  * this assumes that some variables like width/height are already set
645  */
646 int MPV_common_init(MpegEncContext *s)
647 {
648     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
649
650     s->mb_height = (s->height + 15) / 16;
651
652     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
653         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
654         return -1;
655     }
656
657     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
658         return -1;
659
660     dsputil_init(&s->dsp, s->avctx);
661     DCT_common_init(s);
662
663     s->flags= s->avctx->flags;
664     s->flags2= s->avctx->flags2;
665
666     s->mb_width  = (s->width  + 15) / 16;
667     s->mb_stride = s->mb_width + 1;
668     s->b8_stride = s->mb_width*2 + 1;
669     s->b4_stride = s->mb_width*4 + 1;
670     mb_array_size= s->mb_height * s->mb_stride;
671     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
672
673     /* set chroma shifts */
674     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
675                                                     &(s->chroma_y_shift) );
676
677     /* set default edge pos, will be overriden in decode_header if needed */
678     s->h_edge_pos= s->mb_width*16;
679     s->v_edge_pos= s->mb_height*16;
680
681     s->mb_num = s->mb_width * s->mb_height;
682
683     s->block_wrap[0]=
684     s->block_wrap[1]=
685     s->block_wrap[2]=
686     s->block_wrap[3]= s->b8_stride;
687     s->block_wrap[4]=
688     s->block_wrap[5]= s->mb_stride;
689
690     y_size = s->b8_stride * (2 * s->mb_height + 1);
691     c_size = s->mb_stride * (s->mb_height + 1);
692     yc_size = y_size + 2 * c_size;
693
694     /* convert fourcc to upper case */
695     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
696                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
697                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
698                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
699
700     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
701                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
702                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
703                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
704
705     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
706
707     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
708     for(y=0; y<s->mb_height; y++){
709         for(x=0; x<s->mb_width; x++){
710             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
711         }
712     }
713     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
714
715     if (s->encoding) {
716         /* Allocate MV tables */
717         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
718         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
719         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
720         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
721         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
722         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
723         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
724         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
725         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
726         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
727         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
728         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
729
730         if(s->msmpeg4_version){
731             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
732         }
733         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
734
735         /* Allocate MB type table */
736         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
737
738         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
739
740         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
741         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
742         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
743         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
744         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
745         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
746
747         if(s->avctx->noise_reduction){
748             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
749         }
750     }
751     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
752
753     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
754
755     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
756         /* interlaced direct mode decoding tables */
757             for(i=0; i<2; i++){
758                 int j, k;
759                 for(j=0; j<2; j++){
760                     for(k=0; k<2; k++){
761                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
762                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
763                     }
764                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
765                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
766                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
767                 }
768                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
769             }
770     }
771     if (s->out_format == FMT_H263) {
772         /* ac values */
773         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
774         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
775         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
776         s->ac_val[2] = s->ac_val[1] + c_size;
777
778         /* cbp values */
779         CHECKED_ALLOCZ(s->coded_block_base, y_size);
780         s->coded_block= s->coded_block_base + s->b8_stride + 1;
781
782         /* cbp, ac_pred, pred_dir */
783         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
784         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
785     }
786
787     if (s->h263_pred || s->h263_plus || !s->encoding) {
788         /* dc values */
789         //MN: we need these for error resilience of intra-frames
790         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
791         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
792         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
793         s->dc_val[2] = s->dc_val[1] + c_size;
794         for(i=0;i<yc_size;i++)
795             s->dc_val_base[i] = 1024;
796     }
797
798     /* which mb is a intra block */
799     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
800     memset(s->mbintra_table, 1, mb_array_size);
801
802     /* init macroblock skip table */
803     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
804     //Note the +1 is for a quicker mpeg4 slice_end detection
805     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
806
807     s->parse_context.state= -1;
808     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
809        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
810        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
811        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
812     }
813
814     s->context_initialized = 1;
815
816     s->thread_context[0]= s;
817     for(i=1; i<s->avctx->thread_count; i++){
818         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
819         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
820     }
821
822     for(i=0; i<s->avctx->thread_count; i++){
823         if(init_duplicate_context(s->thread_context[i], s) < 0)
824            goto fail;
825         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
826         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
827     }
828
829     return 0;
830  fail:
831     MPV_common_end(s);
832     return -1;
833 }
834
835 /* init common structure for both encoder and decoder */
836 void MPV_common_end(MpegEncContext *s)
837 {
838     int i, j, k;
839
840     for(i=0; i<s->avctx->thread_count; i++){
841         free_duplicate_context(s->thread_context[i]);
842     }
843     for(i=1; i<s->avctx->thread_count; i++){
844         av_freep(&s->thread_context[i]);
845     }
846
847     av_freep(&s->parse_context.buffer);
848     s->parse_context.buffer_size=0;
849
850     av_freep(&s->mb_type);
851     av_freep(&s->p_mv_table_base);
852     av_freep(&s->b_forw_mv_table_base);
853     av_freep(&s->b_back_mv_table_base);
854     av_freep(&s->b_bidir_forw_mv_table_base);
855     av_freep(&s->b_bidir_back_mv_table_base);
856     av_freep(&s->b_direct_mv_table_base);
857     s->p_mv_table= NULL;
858     s->b_forw_mv_table= NULL;
859     s->b_back_mv_table= NULL;
860     s->b_bidir_forw_mv_table= NULL;
861     s->b_bidir_back_mv_table= NULL;
862     s->b_direct_mv_table= NULL;
863     for(i=0; i<2; i++){
864         for(j=0; j<2; j++){
865             for(k=0; k<2; k++){
866                 av_freep(&s->b_field_mv_table_base[i][j][k]);
867                 s->b_field_mv_table[i][j][k]=NULL;
868             }
869             av_freep(&s->b_field_select_table[i][j]);
870             av_freep(&s->p_field_mv_table_base[i][j]);
871             s->p_field_mv_table[i][j]=NULL;
872         }
873         av_freep(&s->p_field_select_table[i]);
874     }
875
876     av_freep(&s->dc_val_base);
877     av_freep(&s->ac_val_base);
878     av_freep(&s->coded_block_base);
879     av_freep(&s->mbintra_table);
880     av_freep(&s->cbp_table);
881     av_freep(&s->pred_dir_table);
882
883     av_freep(&s->mbskip_table);
884     av_freep(&s->prev_pict_types);
885     av_freep(&s->bitstream_buffer);
886     s->allocated_bitstream_buffer_size=0;
887
888     av_freep(&s->avctx->stats_out);
889     av_freep(&s->ac_stats);
890     av_freep(&s->error_status_table);
891     av_freep(&s->mb_index2xy);
892     av_freep(&s->lambda_table);
893     av_freep(&s->q_intra_matrix);
894     av_freep(&s->q_inter_matrix);
895     av_freep(&s->q_intra_matrix16);
896     av_freep(&s->q_inter_matrix16);
897     av_freep(&s->input_picture);
898     av_freep(&s->reordered_input_picture);
899     av_freep(&s->dct_offset);
900
901     if(s->picture){
902         for(i=0; i<MAX_PICTURE_COUNT; i++){
903             free_picture(s, &s->picture[i]);
904         }
905     }
906     av_freep(&s->picture);
907     s->context_initialized = 0;
908     s->last_picture_ptr=
909     s->next_picture_ptr=
910     s->current_picture_ptr= NULL;
911     s->linesize= s->uvlinesize= 0;
912
913     for(i=0; i<3; i++)
914         av_freep(&s->visualization_buffer[i]);
915
916     avcodec_default_free_buffers(s->avctx);
917 }
918
919 #ifdef CONFIG_ENCODERS
920
921 /* init video encoder */
922 int MPV_encode_init(AVCodecContext *avctx)
923 {
924     MpegEncContext *s = avctx->priv_data;
925     int i;
926     int chroma_h_shift, chroma_v_shift;
927
928     MPV_encode_defaults(s);
929
930     switch (avctx->codec_id) {
931     case CODEC_ID_MPEG2VIDEO:
932         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
933             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
934             return -1;
935         }
936         break;
937     case CODEC_ID_LJPEG:
938     case CODEC_ID_MJPEG:
939         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && (avctx->pix_fmt != PIX_FMT_YUV420P || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
940             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
941             return -1;
942         }
943         break;
944     default:
945         if(avctx->pix_fmt != PIX_FMT_YUV420P){
946             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
947             return -1;
948         }
949     }
950
951     switch (avctx->pix_fmt) {
952     case PIX_FMT_YUVJ422P:
953     case PIX_FMT_YUV422P:
954         s->chroma_format = CHROMA_422;
955         break;
956     case PIX_FMT_YUVJ420P:
957     case PIX_FMT_YUV420P:
958     default:
959         s->chroma_format = CHROMA_420;
960         break;
961     }
962
963     s->bit_rate = avctx->bit_rate;
964     s->width = avctx->width;
965     s->height = avctx->height;
966     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
967         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
968         avctx->gop_size=600;
969     }
970     s->gop_size = avctx->gop_size;
971     s->avctx = avctx;
972     s->flags= avctx->flags;
973     s->flags2= avctx->flags2;
974     s->max_b_frames= avctx->max_b_frames;
975     s->codec_id= avctx->codec->id;
976     s->luma_elim_threshold  = avctx->luma_elim_threshold;
977     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
978     s->strict_std_compliance= avctx->strict_std_compliance;
979     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
980     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
981     s->mpeg_quant= avctx->mpeg_quant;
982     s->rtp_mode= !!avctx->rtp_payload_size;
983     s->intra_dc_precision= avctx->intra_dc_precision;
984     s->user_specified_pts = AV_NOPTS_VALUE;
985
986     if (s->gop_size <= 1) {
987         s->intra_only = 1;
988         s->gop_size = 12;
989     } else {
990         s->intra_only = 0;
991     }
992
993     s->me_method = avctx->me_method;
994
995     /* Fixed QSCALE */
996     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
997
998     s->adaptive_quant= (   s->avctx->lumi_masking
999                         || s->avctx->dark_masking
1000                         || s->avctx->temporal_cplx_masking
1001                         || s->avctx->spatial_cplx_masking
1002                         || s->avctx->p_masking
1003                         || s->avctx->border_masking
1004                         || (s->flags&CODEC_FLAG_QP_RD))
1005                        && !s->fixed_qscale;
1006
1007     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1008     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1009     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1010     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1011
1012     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1013         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1014         return -1;
1015     }
1016
1017     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1018         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1019     }
1020
1021     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1022         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1023         return -1;
1024     }
1025
1026     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1027         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1028         return -1;
1029     }
1030
1031     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1032        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1033        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1034
1035         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1036     }
1037
1038     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1039        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1040         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1041         return -1;
1042     }
1043
1044     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1045         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1046         return -1;
1047     }
1048
1049     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1050         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1051         return -1;
1052     }
1053
1054     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1055         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1056         return -1;
1057     }
1058
1059     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1060         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1061         return -1;
1062     }
1063
1064     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1065         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1066         return -1;
1067     }
1068
1069     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1070        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1071         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1072         return -1;
1073     }
1074
1075     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1076         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1077         return -1;
1078     }
1079
1080     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1081         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1082         return -1;
1083     }
1084
1085     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1086         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1087         return -1;
1088     }
1089
1090     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1091         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1092         return -1;
1093     }
1094
1095     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1096         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1097         return -1;
1098     }
1099
1100     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1101        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1102        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1103         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1104         return -1;
1105     }
1106
1107     if(s->avctx->thread_count > 1)
1108         s->rtp_mode= 1;
1109
1110     if(!avctx->time_base.den || !avctx->time_base.num){
1111         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1112         return -1;
1113     }
1114
1115     i= (INT_MAX/2+128)>>8;
1116     if(avctx->me_threshold >= i){
1117         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1118         return -1;
1119     }
1120     if(avctx->mb_threshold >= i){
1121         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1122         return -1;
1123     }
1124
1125     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1126         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1127         avctx->b_frame_strategy = 0;
1128     }
1129
1130     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1131     if(i > 1){
1132         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1133         avctx->time_base.den /= i;
1134         avctx->time_base.num /= i;
1135 //        return -1;
1136     }
1137
1138     if(s->codec_id==CODEC_ID_MJPEG){
1139         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1140         s->inter_quant_bias= 0;
1141     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1142         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1143         s->inter_quant_bias= 0;
1144     }else{
1145         s->intra_quant_bias=0;
1146         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1147     }
1148
1149     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1150         s->intra_quant_bias= avctx->intra_quant_bias;
1151     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1152         s->inter_quant_bias= avctx->inter_quant_bias;
1153
1154     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1155
1156     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1157         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1158         return -1;
1159     }
1160     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1161
1162     switch(avctx->codec->id) {
1163     case CODEC_ID_MPEG1VIDEO:
1164         s->out_format = FMT_MPEG1;
1165         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1166         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1167         break;
1168     case CODEC_ID_MPEG2VIDEO:
1169         s->out_format = FMT_MPEG1;
1170         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1171         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1172         s->rtp_mode= 1;
1173         break;
1174     case CODEC_ID_LJPEG:
1175     case CODEC_ID_JPEGLS:
1176     case CODEC_ID_MJPEG:
1177         s->out_format = FMT_MJPEG;
1178         s->intra_only = 1; /* force intra only for jpeg */
1179         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1180         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1181         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1182         s->mjpeg_vsample[1] = 1;
1183         s->mjpeg_vsample[2] = 1;
1184         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1185         s->mjpeg_hsample[1] = 1;
1186         s->mjpeg_hsample[2] = 1;
1187         if (mjpeg_init(s) < 0)
1188             return -1;
1189         avctx->delay=0;
1190         s->low_delay=1;
1191         break;
1192     case CODEC_ID_H261:
1193         s->out_format = FMT_H261;
1194         avctx->delay=0;
1195         s->low_delay=1;
1196         break;
1197     case CODEC_ID_H263:
1198         if (h263_get_picture_format(s->width, s->height) == 7) {
1199             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1200             return -1;
1201         }
1202         s->out_format = FMT_H263;
1203         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1204         avctx->delay=0;
1205         s->low_delay=1;
1206         break;
1207     case CODEC_ID_H263P:
1208         s->out_format = FMT_H263;
1209         s->h263_plus = 1;
1210         /* Fx */
1211         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1212         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1213         s->modified_quant= s->h263_aic;
1214         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1215         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1216         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1217         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1218         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1219
1220         /* /Fx */
1221         /* These are just to be sure */
1222         avctx->delay=0;
1223         s->low_delay=1;
1224         break;
1225     case CODEC_ID_FLV1:
1226         s->out_format = FMT_H263;
1227         s->h263_flv = 2; /* format = 1; 11-bit codes */
1228         s->unrestricted_mv = 1;
1229         s->rtp_mode=0; /* don't allow GOB */
1230         avctx->delay=0;
1231         s->low_delay=1;
1232         break;
1233     case CODEC_ID_RV10:
1234         s->out_format = FMT_H263;
1235         avctx->delay=0;
1236         s->low_delay=1;
1237         break;
1238     case CODEC_ID_RV20:
1239         s->out_format = FMT_H263;
1240         avctx->delay=0;
1241         s->low_delay=1;
1242         s->modified_quant=1;
1243         s->h263_aic=1;
1244         s->h263_plus=1;
1245         s->loop_filter=1;
1246         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1247         break;
1248     case CODEC_ID_MPEG4:
1249         s->out_format = FMT_H263;
1250         s->h263_pred = 1;
1251         s->unrestricted_mv = 1;
1252         s->low_delay= s->max_b_frames ? 0 : 1;
1253         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1254         break;
1255     case CODEC_ID_MSMPEG4V1:
1256         s->out_format = FMT_H263;
1257         s->h263_msmpeg4 = 1;
1258         s->h263_pred = 1;
1259         s->unrestricted_mv = 1;
1260         s->msmpeg4_version= 1;
1261         avctx->delay=0;
1262         s->low_delay=1;
1263         break;
1264     case CODEC_ID_MSMPEG4V2:
1265         s->out_format = FMT_H263;
1266         s->h263_msmpeg4 = 1;
1267         s->h263_pred = 1;
1268         s->unrestricted_mv = 1;
1269         s->msmpeg4_version= 2;
1270         avctx->delay=0;
1271         s->low_delay=1;
1272         break;
1273     case CODEC_ID_MSMPEG4V3:
1274         s->out_format = FMT_H263;
1275         s->h263_msmpeg4 = 1;
1276         s->h263_pred = 1;
1277         s->unrestricted_mv = 1;
1278         s->msmpeg4_version= 3;
1279         s->flipflop_rounding=1;
1280         avctx->delay=0;
1281         s->low_delay=1;
1282         break;
1283     case CODEC_ID_WMV1:
1284         s->out_format = FMT_H263;
1285         s->h263_msmpeg4 = 1;
1286         s->h263_pred = 1;
1287         s->unrestricted_mv = 1;
1288         s->msmpeg4_version= 4;
1289         s->flipflop_rounding=1;
1290         avctx->delay=0;
1291         s->low_delay=1;
1292         break;
1293     case CODEC_ID_WMV2:
1294         s->out_format = FMT_H263;
1295         s->h263_msmpeg4 = 1;
1296         s->h263_pred = 1;
1297         s->unrestricted_mv = 1;
1298         s->msmpeg4_version= 5;
1299         s->flipflop_rounding=1;
1300         avctx->delay=0;
1301         s->low_delay=1;
1302         break;
1303     default:
1304         return -1;
1305     }
1306
1307     avctx->has_b_frames= !s->low_delay;
1308
1309     s->encoding = 1;
1310
1311     /* init */
1312     if (MPV_common_init(s) < 0)
1313         return -1;
1314
1315     if(s->modified_quant)
1316         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1317     s->progressive_frame=
1318     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1319     s->quant_precision=5;
1320
1321     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1322     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1323
1324 #ifdef CONFIG_H261_ENCODER
1325     if (s->out_format == FMT_H261)
1326         ff_h261_encode_init(s);
1327 #endif
1328     if (s->out_format == FMT_H263)
1329         h263_encode_init(s);
1330     if(s->msmpeg4_version)
1331         ff_msmpeg4_encode_init(s);
1332     if (s->out_format == FMT_MPEG1)
1333         ff_mpeg1_encode_init(s);
1334
1335     /* init q matrix */
1336     for(i=0;i<64;i++) {
1337         int j= s->dsp.idct_permutation[i];
1338         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1339             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1340             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1341         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1342             s->intra_matrix[j] =
1343             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1344         }else
1345         { /* mpeg1/2 */
1346             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1347             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1348         }
1349         if(s->avctx->intra_matrix)
1350             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1351         if(s->avctx->inter_matrix)
1352             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1353     }
1354
1355     /* precompute matrix */
1356     /* for mjpeg, we do include qscale in the matrix */
1357     if (s->out_format != FMT_MJPEG) {
1358         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1359                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1360         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1361                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1362     }
1363
1364     if(ff_rate_control_init(s) < 0)
1365         return -1;
1366
1367     return 0;
1368 }
1369
1370 int MPV_encode_end(AVCodecContext *avctx)
1371 {
1372     MpegEncContext *s = avctx->priv_data;
1373
1374     ff_rate_control_uninit(s);
1375
1376     MPV_common_end(s);
1377     if (s->out_format == FMT_MJPEG)
1378         mjpeg_close(s);
1379
1380     av_freep(&avctx->extradata);
1381
1382     return 0;
1383 }
1384
1385 #endif //CONFIG_ENCODERS
1386
1387 void init_rl(RLTable *rl, int use_static)
1388 {
1389     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1390     uint8_t index_run[MAX_RUN+1];
1391     int last, run, level, start, end, i;
1392
1393     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1394     if(use_static && rl->max_level[0])
1395         return;
1396
1397     /* compute max_level[], max_run[] and index_run[] */
1398     for(last=0;last<2;last++) {
1399         if (last == 0) {
1400             start = 0;
1401             end = rl->last;
1402         } else {
1403             start = rl->last;
1404             end = rl->n;
1405         }
1406
1407         memset(max_level, 0, MAX_RUN + 1);
1408         memset(max_run, 0, MAX_LEVEL + 1);
1409         memset(index_run, rl->n, MAX_RUN + 1);
1410         for(i=start;i<end;i++) {
1411             run = rl->table_run[i];
1412             level = rl->table_level[i];
1413             if (index_run[run] == rl->n)
1414                 index_run[run] = i;
1415             if (level > max_level[run])
1416                 max_level[run] = level;
1417             if (run > max_run[level])
1418                 max_run[level] = run;
1419         }
1420         if(use_static)
1421             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1422         else
1423             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1424         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1425         if(use_static)
1426             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1427         else
1428             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1429         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1430         if(use_static)
1431             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1432         else
1433             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1434         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1435     }
1436 }
1437
1438 /* draw the edges of width 'w' of an image of size width, height */
1439 //FIXME check that this is ok for mpeg4 interlaced
1440 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1441 {
1442     uint8_t *ptr, *last_line;
1443     int i;
1444
1445     last_line = buf + (height - 1) * wrap;
1446     for(i=0;i<w;i++) {
1447         /* top and bottom */
1448         memcpy(buf - (i + 1) * wrap, buf, width);
1449         memcpy(last_line + (i + 1) * wrap, last_line, width);
1450     }
1451     /* left and right */
1452     ptr = buf;
1453     for(i=0;i<height;i++) {
1454         memset(ptr - w, ptr[0], w);
1455         memset(ptr + width, ptr[width-1], w);
1456         ptr += wrap;
1457     }
1458     /* corners */
1459     for(i=0;i<w;i++) {
1460         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1461         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1462         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1463         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1464     }
1465 }
1466
1467 int ff_find_unused_picture(MpegEncContext *s, int shared){
1468     int i;
1469
1470     if(shared){
1471         for(i=0; i<MAX_PICTURE_COUNT; i++){
1472             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1473         }
1474     }else{
1475         for(i=0; i<MAX_PICTURE_COUNT; i++){
1476             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1477         }
1478         for(i=0; i<MAX_PICTURE_COUNT; i++){
1479             if(s->picture[i].data[0]==NULL) return i;
1480         }
1481     }
1482
1483     assert(0);
1484     return -1;
1485 }
1486
1487 static void update_noise_reduction(MpegEncContext *s){
1488     int intra, i;
1489
1490     for(intra=0; intra<2; intra++){
1491         if(s->dct_count[intra] > (1<<16)){
1492             for(i=0; i<64; i++){
1493                 s->dct_error_sum[intra][i] >>=1;
1494             }
1495             s->dct_count[intra] >>= 1;
1496         }
1497
1498         for(i=0; i<64; i++){
1499             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1500         }
1501     }
1502 }
1503
1504 /**
1505  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1506  */
1507 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1508 {
1509     int i;
1510     AVFrame *pic;
1511     s->mb_skipped = 0;
1512
1513     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1514
1515     /* mark&release old frames */
1516     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1517         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1518
1519         /* release forgotten pictures */
1520         /* if(mpeg124/h263) */
1521         if(!s->encoding){
1522             for(i=0; i<MAX_PICTURE_COUNT; i++){
1523                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1524                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1525                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1526                 }
1527             }
1528         }
1529     }
1530 alloc:
1531     if(!s->encoding){
1532         /* release non reference frames */
1533         for(i=0; i<MAX_PICTURE_COUNT; i++){
1534             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1535                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1536             }
1537         }
1538
1539         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1540             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1541         else{
1542             i= ff_find_unused_picture(s, 0);
1543             pic= (AVFrame*)&s->picture[i];
1544         }
1545
1546         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1547                         && !s->dropable ? 3 : 0;
1548
1549         pic->coded_picture_number= s->coded_picture_number++;
1550
1551         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1552             return -1;
1553
1554         s->current_picture_ptr= (Picture*)pic;
1555         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1556         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1557     }
1558
1559     s->current_picture_ptr->pict_type= s->pict_type;
1560 //    if(s->flags && CODEC_FLAG_QSCALE)
1561   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1562     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1563
1564     copy_picture(&s->current_picture, s->current_picture_ptr);
1565
1566   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1567     if (s->pict_type != B_TYPE) {
1568         s->last_picture_ptr= s->next_picture_ptr;
1569         if(!s->dropable)
1570             s->next_picture_ptr= s->current_picture_ptr;
1571     }
1572 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1573         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1574         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1575         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1576         s->pict_type, s->dropable);*/
1577
1578     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1579     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1580
1581     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1582         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1583         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1584         goto alloc;
1585     }
1586
1587     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1588
1589     if(s->picture_structure!=PICT_FRAME){
1590         int i;
1591         for(i=0; i<4; i++){
1592             if(s->picture_structure == PICT_BOTTOM_FIELD){
1593                  s->current_picture.data[i] += s->current_picture.linesize[i];
1594             }
1595             s->current_picture.linesize[i] *= 2;
1596             s->last_picture.linesize[i] *=2;
1597             s->next_picture.linesize[i] *=2;
1598         }
1599     }
1600   }
1601
1602     s->hurry_up= s->avctx->hurry_up;
1603     s->error_resilience= avctx->error_resilience;
1604
1605     /* set dequantizer, we can't do it during init as it might change for mpeg4
1606        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1607     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1608         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1609         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1610     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1611         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1612         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1613     }else{
1614         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1615         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1616     }
1617
1618     if(s->dct_error_sum){
1619         assert(s->avctx->noise_reduction && s->encoding);
1620
1621         update_noise_reduction(s);
1622     }
1623
1624 #ifdef HAVE_XVMC
1625     if(s->avctx->xvmc_acceleration)
1626         return XVMC_field_start(s, avctx);
1627 #endif
1628     return 0;
1629 }
1630
1631 /* generic function for encode/decode called after a frame has been coded/decoded */
1632 void MPV_frame_end(MpegEncContext *s)
1633 {
1634     int i;
1635     /* draw edge for correct motion prediction if outside */
1636 #ifdef HAVE_XVMC
1637 //just to make sure that all data is rendered.
1638     if(s->avctx->xvmc_acceleration){
1639         XVMC_field_end(s);
1640     }else
1641 #endif
1642     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1643             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1644             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1645             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1646     }
1647     emms_c();
1648
1649     s->last_pict_type    = s->pict_type;
1650     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1651     if(s->pict_type!=B_TYPE){
1652         s->last_non_b_pict_type= s->pict_type;
1653     }
1654 #if 0
1655         /* copy back current_picture variables */
1656     for(i=0; i<MAX_PICTURE_COUNT; i++){
1657         if(s->picture[i].data[0] == s->current_picture.data[0]){
1658             s->picture[i]= s->current_picture;
1659             break;
1660         }
1661     }
1662     assert(i<MAX_PICTURE_COUNT);
1663 #endif
1664
1665     if(s->encoding){
1666         /* release non-reference frames */
1667         for(i=0; i<MAX_PICTURE_COUNT; i++){
1668             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1669                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1670             }
1671         }
1672     }
1673     // clear copies, to avoid confusion
1674 #if 0
1675     memset(&s->last_picture, 0, sizeof(Picture));
1676     memset(&s->next_picture, 0, sizeof(Picture));
1677     memset(&s->current_picture, 0, sizeof(Picture));
1678 #endif
1679     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1680 }
1681
1682 /**
1683  * draws an line from (ex, ey) -> (sx, sy).
1684  * @param w width of the image
1685  * @param h height of the image
1686  * @param stride stride/linesize of the image
1687  * @param color color of the arrow
1688  */
1689 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1690     int t, x, y, fr, f;
1691
1692     sx= clip(sx, 0, w-1);
1693     sy= clip(sy, 0, h-1);
1694     ex= clip(ex, 0, w-1);
1695     ey= clip(ey, 0, h-1);
1696
1697     buf[sy*stride + sx]+= color;
1698
1699     if(ABS(ex - sx) > ABS(ey - sy)){
1700         if(sx > ex){
1701             t=sx; sx=ex; ex=t;
1702             t=sy; sy=ey; ey=t;
1703         }
1704         buf+= sx + sy*stride;
1705         ex-= sx;
1706         f= ((ey-sy)<<16)/ex;
1707         for(x= 0; x <= ex; x++){
1708             y = (x*f)>>16;
1709             fr= (x*f)&0xFFFF;
1710             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1711             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1712         }
1713     }else{
1714         if(sy > ey){
1715             t=sx; sx=ex; ex=t;
1716             t=sy; sy=ey; ey=t;
1717         }
1718         buf+= sx + sy*stride;
1719         ey-= sy;
1720         if(ey) f= ((ex-sx)<<16)/ey;
1721         else   f= 0;
1722         for(y= 0; y <= ey; y++){
1723             x = (y*f)>>16;
1724             fr= (y*f)&0xFFFF;
1725             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1726             buf[y*stride + x+1]+= (color*         fr )>>16;;
1727         }
1728     }
1729 }
1730
1731 /**
1732  * draws an arrow from (ex, ey) -> (sx, sy).
1733  * @param w width of the image
1734  * @param h height of the image
1735  * @param stride stride/linesize of the image
1736  * @param color color of the arrow
1737  */
1738 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1739     int dx,dy;
1740
1741     sx= clip(sx, -100, w+100);
1742     sy= clip(sy, -100, h+100);
1743     ex= clip(ex, -100, w+100);
1744     ey= clip(ey, -100, h+100);
1745
1746     dx= ex - sx;
1747     dy= ey - sy;
1748
1749     if(dx*dx + dy*dy > 3*3){
1750         int rx=  dx + dy;
1751         int ry= -dx + dy;
1752         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1753
1754         //FIXME subpixel accuracy
1755         rx= ROUNDED_DIV(rx*3<<4, length);
1756         ry= ROUNDED_DIV(ry*3<<4, length);
1757
1758         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1759         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1760     }
1761     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1762 }
1763
1764 /**
1765  * prints debuging info for the given picture.
1766  */
1767 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1768
1769     if(!pict || !pict->mb_type) return;
1770
1771     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1772         int x,y;
1773
1774         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1775         switch (pict->pict_type) {
1776             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1777             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1778             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1779             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1780             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1781             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1782         }
1783         for(y=0; y<s->mb_height; y++){
1784             for(x=0; x<s->mb_width; x++){
1785                 if(s->avctx->debug&FF_DEBUG_SKIP){
1786                     int count= s->mbskip_table[x + y*s->mb_stride];
1787                     if(count>9) count=9;
1788                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1789                 }
1790                 if(s->avctx->debug&FF_DEBUG_QP){
1791                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1792                 }
1793                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1794                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1795                     //Type & MV direction
1796                     if(IS_PCM(mb_type))
1797                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1798                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1799                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1800                     else if(IS_INTRA4x4(mb_type))
1801                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1802                     else if(IS_INTRA16x16(mb_type))
1803                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1804                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1805                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1806                     else if(IS_DIRECT(mb_type))
1807                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1808                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1809                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1810                     else if(IS_GMC(mb_type))
1811                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1812                     else if(IS_SKIP(mb_type))
1813                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1814                     else if(!USES_LIST(mb_type, 1))
1815                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1816                     else if(!USES_LIST(mb_type, 0))
1817                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1818                     else{
1819                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1820                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1821                     }
1822
1823                     //segmentation
1824                     if(IS_8X8(mb_type))
1825                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1826                     else if(IS_16X8(mb_type))
1827                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1828                     else if(IS_8X16(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1830                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1832                     else
1833                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1834
1835
1836                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1837                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1838                     else
1839                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1840                 }
1841 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1842             }
1843             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1844         }
1845     }
1846
1847     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1848         const int shift= 1 + s->quarter_sample;
1849         int mb_y;
1850         uint8_t *ptr;
1851         int i;
1852         int h_chroma_shift, v_chroma_shift;
1853         const int width = s->avctx->width;
1854         const int height= s->avctx->height;
1855         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1856         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1857         s->low_delay=0; //needed to see the vectors without trashing the buffers
1858
1859         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1860         for(i=0; i<3; i++){
1861             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1862             pict->data[i]= s->visualization_buffer[i];
1863         }
1864         pict->type= FF_BUFFER_TYPE_COPY;
1865         ptr= pict->data[0];
1866
1867         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1868             int mb_x;
1869             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1870                 const int mb_index= mb_x + mb_y*s->mb_stride;
1871                 if((s->avctx->debug_mv) && pict->motion_val){
1872                   int type;
1873                   for(type=0; type<3; type++){
1874                     int direction = 0;
1875                     switch (type) {
1876                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1877                                 continue;
1878                               direction = 0;
1879                               break;
1880                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1881                                 continue;
1882                               direction = 0;
1883                               break;
1884                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1885                                 continue;
1886                               direction = 1;
1887                               break;
1888                     }
1889                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1890                         continue;
1891
1892                     if(IS_8X8(pict->mb_type[mb_index])){
1893                       int i;
1894                       for(i=0; i<4; i++){
1895                         int sx= mb_x*16 + 4 + 8*(i&1);
1896                         int sy= mb_y*16 + 4 + 8*(i>>1);
1897                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1898                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1899                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1900                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1901                       }
1902                     }else if(IS_16X8(pict->mb_type[mb_index])){
1903                       int i;
1904                       for(i=0; i<2; i++){
1905                         int sx=mb_x*16 + 8;
1906                         int sy=mb_y*16 + 4 + 8*i;
1907                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1908                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1909                         int my=(pict->motion_val[direction][xy][1]>>shift);
1910
1911                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1912                             my*=2;
1913
1914                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1915                       }
1916                     }else if(IS_8X16(pict->mb_type[mb_index])){
1917                       int i;
1918                       for(i=0; i<2; i++){
1919                         int sx=mb_x*16 + 4 + 8*i;
1920                         int sy=mb_y*16 + 8;
1921                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1922                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1923                         int my=(pict->motion_val[direction][xy][1]>>shift);
1924
1925                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1926                             my*=2;
1927
1928                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1929                       }
1930                     }else{
1931                       int sx= mb_x*16 + 8;
1932                       int sy= mb_y*16 + 8;
1933                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1934                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1935                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1936                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1937                     }
1938                   }
1939                 }
1940                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1941                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1942                     int y;
1943                     for(y=0; y<8; y++){
1944                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1945                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1946                     }
1947                 }
1948                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1949                     int mb_type= pict->mb_type[mb_index];
1950                     uint64_t u,v;
1951                     int y;
1952 #define COLOR(theta, r)\
1953 u= (int)(128 + r*cos(theta*3.141592/180));\
1954 v= (int)(128 + r*sin(theta*3.141592/180));
1955
1956
1957                     u=v=128;
1958                     if(IS_PCM(mb_type)){
1959                         COLOR(120,48)
1960                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1961                         COLOR(30,48)
1962                     }else if(IS_INTRA4x4(mb_type)){
1963                         COLOR(90,48)
1964                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1965 //                        COLOR(120,48)
1966                     }else if(IS_DIRECT(mb_type)){
1967                         COLOR(150,48)
1968                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1969                         COLOR(170,48)
1970                     }else if(IS_GMC(mb_type)){
1971                         COLOR(190,48)
1972                     }else if(IS_SKIP(mb_type)){
1973 //                        COLOR(180,48)
1974                     }else if(!USES_LIST(mb_type, 1)){
1975                         COLOR(240,48)
1976                     }else if(!USES_LIST(mb_type, 0)){
1977                         COLOR(0,48)
1978                     }else{
1979                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1980                         COLOR(300,48)
1981                     }
1982
1983                     u*= 0x0101010101010101ULL;
1984                     v*= 0x0101010101010101ULL;
1985                     for(y=0; y<8; y++){
1986                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1987                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1988                     }
1989
1990                     //segmentation
1991                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1992                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1993                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1994                     }
1995                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1996                         for(y=0; y<16; y++)
1997                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1998                     }
1999                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2000                         int dm= 1 << (mv_sample_log2-2);
2001                         for(i=0; i<4; i++){
2002                             int sx= mb_x*16 + 8*(i&1);
2003                             int sy= mb_y*16 + 8*(i>>1);
2004                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2005                             //FIXME bidir
2006                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2007                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2008                                 for(y=0; y<8; y++)
2009                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2010                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2011                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2012                         }
2013                     }
2014
2015                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2016                         // hmm
2017                     }
2018                 }
2019                 s->mbskip_table[mb_index]=0;
2020             }
2021         }
2022     }
2023 }
2024
2025 #ifdef CONFIG_ENCODERS
2026
2027 static int get_sae(uint8_t *src, int ref, int stride){
2028     int x,y;
2029     int acc=0;
2030
2031     for(y=0; y<16; y++){
2032         for(x=0; x<16; x++){
2033             acc+= ABS(src[x+y*stride] - ref);
2034         }
2035     }
2036
2037     return acc;
2038 }
2039
2040 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2041     int x, y, w, h;
2042     int acc=0;
2043
2044     w= s->width &~15;
2045     h= s->height&~15;
2046
2047     for(y=0; y<h; y+=16){
2048         for(x=0; x<w; x+=16){
2049             int offset= x + y*stride;
2050             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2051             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2052             int sae = get_sae(src + offset, mean, stride);
2053
2054             acc+= sae + 500 < sad;
2055         }
2056     }
2057     return acc;
2058 }
2059
2060
2061 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2062     AVFrame *pic=NULL;
2063     int64_t pts;
2064     int i;
2065     const int encoding_delay= s->max_b_frames;
2066     int direct=1;
2067
2068     if(pic_arg){
2069         pts= pic_arg->pts;
2070         pic_arg->display_picture_number= s->input_picture_number++;
2071
2072         if(pts != AV_NOPTS_VALUE){
2073             if(s->user_specified_pts != AV_NOPTS_VALUE){
2074                 int64_t time= pts;
2075                 int64_t last= s->user_specified_pts;
2076
2077                 if(time <= last){
2078                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2079                     return -1;
2080                 }
2081             }
2082             s->user_specified_pts= pts;
2083         }else{
2084             if(s->user_specified_pts != AV_NOPTS_VALUE){
2085                 s->user_specified_pts=
2086                 pts= s->user_specified_pts + 1;
2087                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2088             }else{
2089                 pts= pic_arg->display_picture_number;
2090             }
2091         }
2092     }
2093
2094   if(pic_arg){
2095     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2096     if(pic_arg->linesize[0] != s->linesize) direct=0;
2097     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2098     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2099
2100 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2101
2102     if(direct){
2103         i= ff_find_unused_picture(s, 1);
2104
2105         pic= (AVFrame*)&s->picture[i];
2106         pic->reference= 3;
2107
2108         for(i=0; i<4; i++){
2109             pic->data[i]= pic_arg->data[i];
2110             pic->linesize[i]= pic_arg->linesize[i];
2111         }
2112         alloc_picture(s, (Picture*)pic, 1);
2113     }else{
2114         i= ff_find_unused_picture(s, 0);
2115
2116         pic= (AVFrame*)&s->picture[i];
2117         pic->reference= 3;
2118
2119         alloc_picture(s, (Picture*)pic, 0);
2120
2121         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2122            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2123            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2124        // empty
2125         }else{
2126             int h_chroma_shift, v_chroma_shift;
2127             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2128
2129             for(i=0; i<3; i++){
2130                 int src_stride= pic_arg->linesize[i];
2131                 int dst_stride= i ? s->uvlinesize : s->linesize;
2132                 int h_shift= i ? h_chroma_shift : 0;
2133                 int v_shift= i ? v_chroma_shift : 0;
2134                 int w= s->width >>h_shift;
2135                 int h= s->height>>v_shift;
2136                 uint8_t *src= pic_arg->data[i];
2137                 uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2138
2139                 if(src_stride==dst_stride)
2140                     memcpy(dst, src, src_stride*h);
2141                 else{
2142                     while(h--){
2143                         memcpy(dst, src, w);
2144                         dst += dst_stride;
2145                         src += src_stride;
2146                     }
2147                 }
2148             }
2149         }
2150     }
2151     copy_picture_attributes(s, pic, pic_arg);
2152     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2153   }
2154
2155     /* shift buffer entries */
2156     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2157         s->input_picture[i-1]= s->input_picture[i];
2158
2159     s->input_picture[encoding_delay]= (Picture*)pic;
2160
2161     return 0;
2162 }
2163
2164 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2165     int x, y, plane;
2166     int score=0;
2167     int64_t score64=0;
2168
2169     for(plane=0; plane<3; plane++){
2170         const int stride= p->linesize[plane];
2171         const int bw= plane ? 1 : 2;
2172         for(y=0; y<s->mb_height*bw; y++){
2173             for(x=0; x<s->mb_width*bw; x++){
2174                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2175                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2176
2177                 switch(s->avctx->frame_skip_exp){
2178                     case 0: score= FFMAX(score, v); break;
2179                     case 1: score+= ABS(v);break;
2180                     case 2: score+= v*v;break;
2181                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2182                     case 4: score64+= v*v*(int64_t)(v*v);break;
2183                 }
2184             }
2185         }
2186     }
2187
2188     if(score) score64= score;
2189
2190     if(score64 < s->avctx->frame_skip_threshold)
2191         return 1;
2192     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2193         return 1;
2194     return 0;
2195 }
2196
2197 static int estimate_best_b_count(MpegEncContext *s){
2198     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2199     AVCodecContext *c= avcodec_alloc_context();
2200     AVFrame input[FF_MAX_B_FRAMES+2];
2201     const int scale= s->avctx->brd_scale;
2202     int i, j, out_size, p_lambda, b_lambda, lambda2;
2203     int outbuf_size= s->width * s->height; //FIXME
2204     uint8_t *outbuf= av_malloc(outbuf_size);
2205     int64_t best_rd= INT64_MAX;
2206     int best_b_count= -1;
2207
2208     assert(scale>=0 && scale <=3);
2209
2210 //    emms_c();
2211     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2212     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2213     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2214     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2215
2216     c->width = s->width >> scale;
2217     c->height= s->height>> scale;
2218     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2219     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2220     c->mb_decision= s->avctx->mb_decision;
2221     c->me_cmp= s->avctx->me_cmp;
2222     c->mb_cmp= s->avctx->mb_cmp;
2223     c->me_sub_cmp= s->avctx->me_sub_cmp;
2224     c->pix_fmt = PIX_FMT_YUV420P;
2225     c->time_base= s->avctx->time_base;
2226     c->max_b_frames= s->max_b_frames;
2227
2228     if (avcodec_open(c, codec) < 0)
2229         return -1;
2230
2231     for(i=0; i<s->max_b_frames+2; i++){
2232         int ysize= c->width*c->height;
2233         int csize= (c->width/2)*(c->height/2);
2234         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2235
2236         if(pre_input_ptr)
2237             pre_input= *pre_input_ptr;
2238
2239         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2240             pre_input.data[0]+=INPLACE_OFFSET;
2241             pre_input.data[1]+=INPLACE_OFFSET;
2242             pre_input.data[2]+=INPLACE_OFFSET;
2243         }
2244
2245         avcodec_get_frame_defaults(&input[i]);
2246         input[i].data[0]= av_malloc(ysize + 2*csize);
2247         input[i].data[1]= input[i].data[0] + ysize;
2248         input[i].data[2]= input[i].data[1] + csize;
2249         input[i].linesize[0]= c->width;
2250         input[i].linesize[1]=
2251         input[i].linesize[2]= c->width/2;
2252
2253         if(!i || s->input_picture[i-1]){
2254             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2255             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2256             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2257         }
2258     }
2259
2260     for(j=0; j<s->max_b_frames+1; j++){
2261         int64_t rd=0;
2262
2263         if(!s->input_picture[j])
2264             break;
2265
2266         c->error[0]= c->error[1]= c->error[2]= 0;
2267
2268         input[0].pict_type= I_TYPE;
2269         input[0].quality= 1 * FF_QP2LAMBDA;
2270         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2271 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2272
2273         for(i=0; i<s->max_b_frames+1; i++){
2274             int is_p= i % (j+1) == j || i==s->max_b_frames;
2275
2276             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2277             input[i+1].quality= is_p ? p_lambda : b_lambda;
2278             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2279             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2280         }
2281
2282         /* get the delayed frames */
2283         while(out_size){
2284             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2285             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2286         }
2287
2288         rd += c->error[0] + c->error[1] + c->error[2];
2289
2290         if(rd < best_rd){
2291             best_rd= rd;
2292             best_b_count= j;
2293         }
2294     }
2295
2296     av_freep(&outbuf);
2297     avcodec_close(c);
2298     av_freep(&c);
2299
2300     for(i=0; i<s->max_b_frames+2; i++){
2301         av_freep(&input[i].data[0]);
2302     }
2303
2304     return best_b_count;
2305 }
2306
2307 static void select_input_picture(MpegEncContext *s){
2308     int i;
2309
2310     for(i=1; i<MAX_PICTURE_COUNT; i++)
2311         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2312     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2313
2314     /* set next picture type & ordering */
2315     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2316         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2317             s->reordered_input_picture[0]= s->input_picture[0];
2318             s->reordered_input_picture[0]->pict_type= I_TYPE;
2319             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2320         }else{
2321             int b_frames;
2322
2323             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2324                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2325                 //FIXME check that te gop check above is +-1 correct
2326 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2327
2328                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2329                         for(i=0; i<4; i++)
2330                             s->input_picture[0]->data[i]= NULL;
2331                         s->input_picture[0]->type= 0;
2332                     }else{
2333                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2334                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2335
2336                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2337                     }
2338
2339                     emms_c();
2340                     ff_vbv_update(s, 0);
2341
2342                     goto no_output_pic;
2343                 }
2344             }
2345
2346             if(s->flags&CODEC_FLAG_PASS2){
2347                 for(i=0; i<s->max_b_frames+1; i++){
2348                     int pict_num= s->input_picture[0]->display_picture_number + i;
2349
2350                     if(pict_num >= s->rc_context.num_entries)
2351                         break;
2352                     if(!s->input_picture[i]){
2353                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2354                         break;
2355                     }
2356
2357                     s->input_picture[i]->pict_type=
2358                         s->rc_context.entry[pict_num].new_pict_type;
2359                 }
2360             }
2361
2362             if(s->avctx->b_frame_strategy==0){
2363                 b_frames= s->max_b_frames;
2364                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2365             }else if(s->avctx->b_frame_strategy==1){
2366                 for(i=1; i<s->max_b_frames+1; i++){
2367                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2368                         s->input_picture[i]->b_frame_score=
2369                             get_intra_count(s, s->input_picture[i  ]->data[0],
2370                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2371                     }
2372                 }
2373                 for(i=0; i<s->max_b_frames+1; i++){
2374                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2375                 }
2376
2377                 b_frames= FFMAX(0, i-1);
2378
2379                 /* reset scores */
2380                 for(i=0; i<b_frames+1; i++){
2381                     s->input_picture[i]->b_frame_score=0;
2382                 }
2383             }else if(s->avctx->b_frame_strategy==2){
2384                 b_frames= estimate_best_b_count(s);
2385             }else{
2386                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2387                 b_frames=0;
2388             }
2389
2390             emms_c();
2391 //static int b_count=0;
2392 //b_count+= b_frames;
2393 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2394
2395             for(i= b_frames - 1; i>=0; i--){
2396                 int type= s->input_picture[i]->pict_type;
2397                 if(type && type != B_TYPE)
2398                     b_frames= i;
2399             }
2400             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2401                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2402             }
2403
2404             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2405               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2406                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2407               }else{
2408                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2409                     b_frames=0;
2410                 s->input_picture[b_frames]->pict_type= I_TYPE;
2411               }
2412             }
2413
2414             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2415                && b_frames
2416                && s->input_picture[b_frames]->pict_type== I_TYPE)
2417                 b_frames--;
2418
2419             s->reordered_input_picture[0]= s->input_picture[b_frames];
2420             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2421                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2422             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2423             for(i=0; i<b_frames; i++){
2424                 s->reordered_input_picture[i+1]= s->input_picture[i];
2425                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2426                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2427             }
2428         }
2429     }
2430 no_output_pic:
2431     if(s->reordered_input_picture[0]){
2432         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2433
2434         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2435
2436         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2437             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2438
2439             int i= ff_find_unused_picture(s, 0);
2440             Picture *pic= &s->picture[i];
2441
2442             /* mark us unused / free shared pic */
2443             for(i=0; i<4; i++)
2444                 s->reordered_input_picture[0]->data[i]= NULL;
2445             s->reordered_input_picture[0]->type= 0;
2446
2447             pic->reference              = s->reordered_input_picture[0]->reference;
2448
2449             alloc_picture(s, pic, 0);
2450
2451             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2452
2453             s->current_picture_ptr= pic;
2454         }else{
2455             // input is not a shared pix -> reuse buffer for current_pix
2456
2457             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2458                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2459
2460             s->current_picture_ptr= s->reordered_input_picture[0];
2461             for(i=0; i<4; i++){
2462                 s->new_picture.data[i]+= INPLACE_OFFSET;
2463             }
2464         }
2465         copy_picture(&s->current_picture, s->current_picture_ptr);
2466
2467         s->picture_number= s->new_picture.display_picture_number;
2468 //printf("dpn:%d\n", s->picture_number);
2469     }else{
2470        memset(&s->new_picture, 0, sizeof(Picture));
2471     }
2472 }
2473
2474 int MPV_encode_picture(AVCodecContext *avctx,
2475                        unsigned char *buf, int buf_size, void *data)
2476 {
2477     MpegEncContext *s = avctx->priv_data;
2478     AVFrame *pic_arg = data;
2479     int i, stuffing_count;
2480
2481     for(i=0; i<avctx->thread_count; i++){
2482         int start_y= s->thread_context[i]->start_mb_y;
2483         int   end_y= s->thread_context[i]->  end_mb_y;
2484         int h= s->mb_height;
2485         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2486         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2487
2488         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2489     }
2490
2491     s->picture_in_gop_number++;
2492
2493     if(load_input_picture(s, pic_arg) < 0)
2494         return -1;
2495
2496     select_input_picture(s);
2497
2498     /* output? */
2499     if(s->new_picture.data[0]){
2500         s->pict_type= s->new_picture.pict_type;
2501 //emms_c();
2502 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2503         MPV_frame_start(s, avctx);
2504
2505         encode_picture(s, s->picture_number);
2506
2507         avctx->real_pict_num  = s->picture_number;
2508         avctx->header_bits = s->header_bits;
2509         avctx->mv_bits     = s->mv_bits;
2510         avctx->misc_bits   = s->misc_bits;
2511         avctx->i_tex_bits  = s->i_tex_bits;
2512         avctx->p_tex_bits  = s->p_tex_bits;
2513         avctx->i_count     = s->i_count;
2514         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2515         avctx->skip_count  = s->skip_count;
2516
2517         MPV_frame_end(s);
2518
2519         if (s->out_format == FMT_MJPEG)
2520             mjpeg_picture_trailer(s);
2521
2522         if(s->flags&CODEC_FLAG_PASS1)
2523             ff_write_pass1_stats(s);
2524
2525         for(i=0; i<4; i++){
2526             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2527             avctx->error[i] += s->current_picture_ptr->error[i];
2528         }
2529
2530         if(s->flags&CODEC_FLAG_PASS1)
2531             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2532         flush_put_bits(&s->pb);
2533         s->frame_bits  = put_bits_count(&s->pb);
2534
2535         stuffing_count= ff_vbv_update(s, s->frame_bits);
2536         if(stuffing_count){
2537             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2538                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2539                 return -1;
2540             }
2541
2542             switch(s->codec_id){
2543             case CODEC_ID_MPEG1VIDEO:
2544             case CODEC_ID_MPEG2VIDEO:
2545                 while(stuffing_count--){
2546                     put_bits(&s->pb, 8, 0);
2547                 }
2548             break;
2549             case CODEC_ID_MPEG4:
2550                 put_bits(&s->pb, 16, 0);
2551                 put_bits(&s->pb, 16, 0x1C3);
2552                 stuffing_count -= 4;
2553                 while(stuffing_count--){
2554                     put_bits(&s->pb, 8, 0xFF);
2555                 }
2556             break;
2557             default:
2558                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2559             }
2560             flush_put_bits(&s->pb);
2561             s->frame_bits  = put_bits_count(&s->pb);
2562         }
2563
2564         /* update mpeg1/2 vbv_delay for CBR */
2565         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2566            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2567             int vbv_delay;
2568
2569             assert(s->repeat_first_field==0);
2570
2571             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2572             assert(vbv_delay < 0xFFFF);
2573
2574             s->vbv_delay_ptr[0] &= 0xF8;
2575             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2576             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2577             s->vbv_delay_ptr[2] &= 0x07;
2578             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2579         }
2580         s->total_bits += s->frame_bits;
2581         avctx->frame_bits  = s->frame_bits;
2582     }else{
2583         assert((pbBufPtr(&s->pb) == s->pb.buf));
2584         s->frame_bits=0;
2585     }
2586     assert((s->frame_bits&7)==0);
2587
2588     return s->frame_bits/8;
2589 }
2590
2591 #endif //CONFIG_ENCODERS
2592
2593 static inline void gmc1_motion(MpegEncContext *s,
2594                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2595                                uint8_t **ref_picture)
2596 {
2597     uint8_t *ptr;
2598     int offset, src_x, src_y, linesize, uvlinesize;
2599     int motion_x, motion_y;
2600     int emu=0;
2601
2602     motion_x= s->sprite_offset[0][0];
2603     motion_y= s->sprite_offset[0][1];
2604     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2605     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2606     motion_x<<=(3-s->sprite_warping_accuracy);
2607     motion_y<<=(3-s->sprite_warping_accuracy);
2608     src_x = clip(src_x, -16, s->width);
2609     if (src_x == s->width)
2610         motion_x =0;
2611     src_y = clip(src_y, -16, s->height);
2612     if (src_y == s->height)
2613         motion_y =0;
2614
2615     linesize = s->linesize;
2616     uvlinesize = s->uvlinesize;
2617
2618     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2619
2620     if(s->flags&CODEC_FLAG_EMU_EDGE){
2621         if(   (unsigned)src_x >= s->h_edge_pos - 17
2622            || (unsigned)src_y >= s->v_edge_pos - 17){
2623             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2624             ptr= s->edge_emu_buffer;
2625         }
2626     }
2627
2628     if((motion_x|motion_y)&7){
2629         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2630         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2631     }else{
2632         int dxy;
2633
2634         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2635         if (s->no_rounding){
2636             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2637         }else{
2638             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2639         }
2640     }
2641
2642     if(s->flags&CODEC_FLAG_GRAY) return;
2643
2644     motion_x= s->sprite_offset[1][0];
2645     motion_y= s->sprite_offset[1][1];
2646     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2647     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2648     motion_x<<=(3-s->sprite_warping_accuracy);
2649     motion_y<<=(3-s->sprite_warping_accuracy);
2650     src_x = clip(src_x, -8, s->width>>1);
2651     if (src_x == s->width>>1)
2652         motion_x =0;
2653     src_y = clip(src_y, -8, s->height>>1);
2654     if (src_y == s->height>>1)
2655         motion_y =0;
2656
2657     offset = (src_y * uvlinesize) + src_x;
2658     ptr = ref_picture[1] + offset;
2659     if(s->flags&CODEC_FLAG_EMU_EDGE){
2660         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2661            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2662             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2663             ptr= s->edge_emu_buffer;
2664             emu=1;
2665         }
2666     }
2667     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2668
2669     ptr = ref_picture[2] + offset;
2670     if(emu){
2671         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2672         ptr= s->edge_emu_buffer;
2673     }
2674     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2675
2676     return;
2677 }
2678
2679 static inline void gmc_motion(MpegEncContext *s,
2680                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2681                                uint8_t **ref_picture)
2682 {
2683     uint8_t *ptr;
2684     int linesize, uvlinesize;
2685     const int a= s->sprite_warping_accuracy;
2686     int ox, oy;
2687
2688     linesize = s->linesize;
2689     uvlinesize = s->uvlinesize;
2690
2691     ptr = ref_picture[0];
2692
2693     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2694     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2695
2696     s->dsp.gmc(dest_y, ptr, linesize, 16,
2697            ox,
2698            oy,
2699            s->sprite_delta[0][0], s->sprite_delta[0][1],
2700            s->sprite_delta[1][0], s->sprite_delta[1][1],
2701            a+1, (1<<(2*a+1)) - s->no_rounding,
2702            s->h_edge_pos, s->v_edge_pos);
2703     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2704            ox + s->sprite_delta[0][0]*8,
2705            oy + s->sprite_delta[1][0]*8,
2706            s->sprite_delta[0][0], s->sprite_delta[0][1],
2707            s->sprite_delta[1][0], s->sprite_delta[1][1],
2708            a+1, (1<<(2*a+1)) - s->no_rounding,
2709            s->h_edge_pos, s->v_edge_pos);
2710
2711     if(s->flags&CODEC_FLAG_GRAY) return;
2712
2713     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2714     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2715
2716     ptr = ref_picture[1];
2717     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2718            ox,
2719            oy,
2720            s->sprite_delta[0][0], s->sprite_delta[0][1],
2721            s->sprite_delta[1][0], s->sprite_delta[1][1],
2722            a+1, (1<<(2*a+1)) - s->no_rounding,
2723            s->h_edge_pos>>1, s->v_edge_pos>>1);
2724
2725     ptr = ref_picture[2];
2726     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2727            ox,
2728            oy,
2729            s->sprite_delta[0][0], s->sprite_delta[0][1],
2730            s->sprite_delta[1][0], s->sprite_delta[1][1],
2731            a+1, (1<<(2*a+1)) - s->no_rounding,
2732            s->h_edge_pos>>1, s->v_edge_pos>>1);
2733 }
2734
2735 /**
2736  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2737  * @param buf destination buffer
2738  * @param src source buffer
2739  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2740  * @param block_w width of block
2741  * @param block_h height of block
2742  * @param src_x x coordinate of the top left sample of the block in the source buffer
2743  * @param src_y y coordinate of the top left sample of the block in the source buffer
2744  * @param w width of the source buffer
2745  * @param h height of the source buffer
2746  */
2747 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2748                                     int src_x, int src_y, int w, int h){
2749     int x, y;
2750     int start_y, start_x, end_y, end_x;
2751
2752     if(src_y>= h){
2753         src+= (h-1-src_y)*linesize;
2754         src_y=h-1;
2755     }else if(src_y<=-block_h){
2756         src+= (1-block_h-src_y)*linesize;
2757         src_y=1-block_h;
2758     }
2759     if(src_x>= w){
2760         src+= (w-1-src_x);
2761         src_x=w-1;
2762     }else if(src_x<=-block_w){
2763         src+= (1-block_w-src_x);
2764         src_x=1-block_w;
2765     }
2766
2767     start_y= FFMAX(0, -src_y);
2768     start_x= FFMAX(0, -src_x);
2769     end_y= FFMIN(block_h, h-src_y);
2770     end_x= FFMIN(block_w, w-src_x);
2771
2772     // copy existing part
2773     for(y=start_y; y<end_y; y++){
2774         for(x=start_x; x<end_x; x++){
2775             buf[x + y*linesize]= src[x + y*linesize];
2776         }
2777     }
2778
2779     //top
2780     for(y=0; y<start_y; y++){
2781         for(x=start_x; x<end_x; x++){
2782             buf[x + y*linesize]= buf[x + start_y*linesize];
2783         }
2784     }
2785
2786     //bottom
2787     for(y=end_y; y<block_h; y++){
2788         for(x=start_x; x<end_x; x++){
2789             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2790         }
2791     }
2792
2793     for(y=0; y<block_h; y++){
2794        //left
2795         for(x=0; x<start_x; x++){
2796             buf[x + y*linesize]= buf[start_x + y*linesize];
2797         }
2798
2799        //right
2800         for(x=end_x; x<block_w; x++){
2801             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2802         }
2803     }
2804 }
2805
2806 static inline int hpel_motion(MpegEncContext *s,
2807                                   uint8_t *dest, uint8_t *src,
2808                                   int field_based, int field_select,
2809                                   int src_x, int src_y,
2810                                   int width, int height, int stride,
2811                                   int h_edge_pos, int v_edge_pos,
2812                                   int w, int h, op_pixels_func *pix_op,
2813                                   int motion_x, int motion_y)
2814 {
2815     int dxy;
2816     int emu=0;
2817
2818     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2819     src_x += motion_x >> 1;
2820     src_y += motion_y >> 1;
2821
2822     /* WARNING: do no forget half pels */
2823     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2824     if (src_x == width)
2825         dxy &= ~1;
2826     src_y = clip(src_y, -16, height);
2827     if (src_y == height)
2828         dxy &= ~2;
2829     src += src_y * stride + src_x;
2830
2831     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2832         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2833            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2834             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2835                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2836             src= s->edge_emu_buffer;
2837             emu=1;
2838         }
2839     }
2840     if(field_select)
2841         src += s->linesize;
2842     pix_op[dxy](dest, src, stride, h);
2843     return emu;
2844 }
2845
2846 static inline int hpel_motion_lowres(MpegEncContext *s,
2847                                   uint8_t *dest, uint8_t *src,
2848                                   int field_based, int field_select,
2849                                   int src_x, int src_y,
2850                                   int width, int height, int stride,
2851                                   int h_edge_pos, int v_edge_pos,
2852                                   int w, int h, h264_chroma_mc_func *pix_op,
2853                                   int motion_x, int motion_y)
2854 {
2855     const int lowres= s->avctx->lowres;
2856     const int s_mask= (2<<lowres)-1;
2857     int emu=0;
2858     int sx, sy;
2859
2860     if(s->quarter_sample){
2861         motion_x/=2;
2862         motion_y/=2;
2863     }
2864
2865     sx= motion_x & s_mask;
2866     sy= motion_y & s_mask;
2867     src_x += motion_x >> (lowres+1);
2868     src_y += motion_y >> (lowres+1);
2869
2870     src += src_y * stride + src_x;
2871
2872     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2873        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2874         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2875                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2876         src= s->edge_emu_buffer;
2877         emu=1;
2878     }
2879
2880     sx <<= 2 - lowres;
2881     sy <<= 2 - lowres;
2882     if(field_select)
2883         src += s->linesize;
2884     pix_op[lowres](dest, src, stride, h, sx, sy);
2885     return emu;
2886 }
2887
2888 /* apply one mpeg motion vector to the three components */
2889 static always_inline void mpeg_motion(MpegEncContext *s,
2890                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2891                                int field_based, int bottom_field, int field_select,
2892                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2893                                int motion_x, int motion_y, int h)
2894 {
2895     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2896     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2897
2898 #if 0
2899 if(s->quarter_sample)
2900 {
2901     motion_x>>=1;
2902     motion_y>>=1;
2903 }
2904 #endif
2905
2906     v_edge_pos = s->v_edge_pos >> field_based;
2907     linesize   = s->current_picture.linesize[0] << field_based;
2908     uvlinesize = s->current_picture.linesize[1] << field_based;
2909
2910     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2911     src_x = s->mb_x* 16               + (motion_x >> 1);
2912     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2913
2914     if (s->out_format == FMT_H263) {
2915         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2916             mx = (motion_x>>1)|(motion_x&1);
2917             my = motion_y >>1;
2918             uvdxy = ((my & 1) << 1) | (mx & 1);
2919             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2920             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2921         }else{
2922             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2923             uvsrc_x = src_x>>1;
2924             uvsrc_y = src_y>>1;
2925         }
2926     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2927         mx = motion_x / 4;
2928         my = motion_y / 4;
2929         uvdxy = 0;
2930         uvsrc_x = s->mb_x*8 + mx;
2931         uvsrc_y = s->mb_y*8 + my;
2932     } else {
2933         if(s->chroma_y_shift){
2934             mx = motion_x / 2;
2935             my = motion_y / 2;
2936             uvdxy = ((my & 1) << 1) | (mx & 1);
2937             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2938             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2939         } else {
2940             if(s->chroma_x_shift){
2941             //Chroma422
2942                 mx = motion_x / 2;
2943                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2944                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2945                 uvsrc_y = src_y;
2946             } else {
2947             //Chroma444
2948                 uvdxy = dxy;
2949                 uvsrc_x = src_x;
2950                 uvsrc_y = src_y;
2951             }
2952         }
2953     }
2954
2955     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2956     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2957     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2958
2959     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2960        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2961             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2962                s->codec_id == CODEC_ID_MPEG1VIDEO){
2963                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2964                 return ;
2965             }
2966             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2967                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2968             ptr_y = s->edge_emu_buffer;
2969             if(!(s->flags&CODEC_FLAG_GRAY)){
2970                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2971                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2972                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2973                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2974                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2975                 ptr_cb= uvbuf;
2976                 ptr_cr= uvbuf+16;
2977             }
2978     }
2979
2980     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2981         dest_y += s->linesize;
2982         dest_cb+= s->uvlinesize;
2983         dest_cr+= s->uvlinesize;
2984     }
2985
2986     if(field_select){
2987         ptr_y += s->linesize;
2988         ptr_cb+= s->uvlinesize;
2989         ptr_cr+= s->uvlinesize;
2990     }
2991
2992     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2993
2994     if(!(s->flags&CODEC_FLAG_GRAY)){
2995         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2996         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2997     }
2998 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2999     if(s->out_format == FMT_H261){
3000         ff_h261_loop_filter(s);
3001     }
3002 #endif
3003 }
3004
3005 /* apply one mpeg motion vector to the three components */
3006 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
3007                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3008                                int field_based, int bottom_field, int field_select,
3009                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3010                                int motion_x, int motion_y, int h)
3011 {
3012     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3013     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3014     const int lowres= s->avctx->lowres;
3015     const int block_s= 8>>lowres;
3016     const int s_mask= (2<<lowres)-1;
3017     const int h_edge_pos = s->h_edge_pos >> lowres;
3018     const int v_edge_pos = s->v_edge_pos >> lowres;
3019     linesize   = s->current_picture.linesize[0] << field_based;
3020     uvlinesize = s->current_picture.linesize[1] << field_based;
3021
3022     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3023         motion_x/=2;
3024         motion_y/=2;
3025     }
3026
3027     if(field_based){
3028         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3029     }
3030
3031     sx= motion_x & s_mask;
3032     sy= motion_y & s_mask;
3033     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3034     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3035
3036     if (s->out_format == FMT_H263) {
3037         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3038         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3039         uvsrc_x = src_x>>1;
3040         uvsrc_y = src_y>>1;
3041     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3042         mx = motion_x / 4;
3043         my = motion_y / 4;
3044         uvsx = (2*mx) & s_mask;
3045         uvsy = (2*my) & s_mask;
3046         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3047         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3048     } else {
3049         mx = motion_x / 2;
3050         my = motion_y / 2;
3051         uvsx = mx & s_mask;
3052         uvsy = my & s_mask;
3053         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3054         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3055     }
3056
3057     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3058     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3059     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3060
3061     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3062        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3063             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3064                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3065             ptr_y = s->edge_emu_buffer;
3066             if(!(s->flags&CODEC_FLAG_GRAY)){
3067                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3068                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3069                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3070                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3071                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3072                 ptr_cb= uvbuf;
3073                 ptr_cr= uvbuf+16;
3074             }
3075     }
3076
3077     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3078         dest_y += s->linesize;
3079         dest_cb+= s->uvlinesize;
3080         dest_cr+= s->uvlinesize;
3081     }
3082
3083     if(field_select){
3084         ptr_y += s->linesize;
3085         ptr_cb+= s->uvlinesize;
3086         ptr_cr+= s->uvlinesize;
3087     }
3088
3089     sx <<= 2 - lowres;
3090     sy <<= 2 - lowres;
3091     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3092
3093     if(!(s->flags&CODEC_FLAG_GRAY)){
3094         uvsx <<= 2 - lowres;
3095         uvsy <<= 2 - lowres;
3096         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3097         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3098     }
3099     //FIXME h261 lowres loop filter
3100 }
3101
3102 //FIXME move to dsputil, avg variant, 16x16 version
3103 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3104     int x;
3105     uint8_t * const top   = src[1];
3106     uint8_t * const left  = src[2];
3107     uint8_t * const mid   = src[0];
3108     uint8_t * const right = src[3];
3109     uint8_t * const bottom= src[4];
3110 #define OBMC_FILTER(x, t, l, m, r, b)\
3111     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3112 #define OBMC_FILTER4(x, t, l, m, r, b)\
3113     OBMC_FILTER(x         , t, l, m, r, b);\
3114     OBMC_FILTER(x+1       , t, l, m, r, b);\
3115     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3116     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3117
3118     x=0;
3119     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3120     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3121     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3122     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3123     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3124     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3125     x+= stride;
3126     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3127     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3128     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3129     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3130     x+= stride;
3131     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3132     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3133     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3134     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3135     x+= 2*stride;
3136     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3137     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3138     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3139     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3140     x+= 2*stride;
3141     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3142     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3143     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3144     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3145     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3146     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3147     x+= stride;
3148     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3149     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3150     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3151     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3152 }
3153
3154 /* obmc for 1 8x8 luma block */
3155 static inline void obmc_motion(MpegEncContext *s,
3156                                uint8_t *dest, uint8_t *src,
3157                                int src_x, int src_y,
3158                                op_pixels_func *pix_op,
3159                                int16_t mv[5][2]/* mid top left right bottom*/)
3160 #define MID    0
3161 {
3162     int i;
3163     uint8_t *ptr[5];
3164
3165     assert(s->quarter_sample==0);
3166
3167     for(i=0; i<5; i++){
3168         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3169             ptr[i]= ptr[MID];
3170         }else{
3171             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3172             hpel_motion(s, ptr[i], src, 0, 0,
3173                         src_x, src_y,
3174                         s->width, s->height, s->linesize,
3175                         s->h_edge_pos, s->v_edge_pos,
3176                         8, 8, pix_op,
3177                         mv[i][0], mv[i][1]);
3178         }
3179     }
3180
3181     put_obmc(dest, ptr, s->linesize);
3182 }
3183
3184 static inline void qpel_motion(MpegEncContext *s,
3185                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3186                                int field_based, int bottom_field, int field_select,
3187                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3188                                qpel_mc_func (*qpix_op)[16],
3189                                int motion_x, int motion_y, int h)
3190 {
3191     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3192     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3193
3194     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3195     src_x = s->mb_x *  16                 + (motion_x >> 2);
3196     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3197
3198     v_edge_pos = s->v_edge_pos >> field_based;
3199     linesize = s->linesize << field_based;
3200     uvlinesize = s->uvlinesize << field_based;
3201
3202     if(field_based){
3203         mx= motion_x/2;
3204         my= motion_y>>1;
3205     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3206         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3207         mx= (motion_x>>1) + rtab[motion_x&7];
3208         my= (motion_y>>1) + rtab[motion_y&7];
3209     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3210         mx= (motion_x>>1)|(motion_x&1);
3211         my= (motion_y>>1)|(motion_y&1);
3212     }else{
3213         mx= motion_x/2;
3214         my= motion_y/2;
3215     }
3216     mx= (mx>>1)|(mx&1);
3217     my= (my>>1)|(my&1);
3218
3219     uvdxy= (mx&1) | ((my&1)<<1);
3220     mx>>=1;
3221     my>>=1;
3222
3223     uvsrc_x = s->mb_x *  8                 + mx;
3224     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3225
3226     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3227     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3228     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3229
3230     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3231        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3232         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3233                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3234         ptr_y= s->edge_emu_buffer;
3235         if(!(s->flags&CODEC_FLAG_GRAY)){
3236             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3237             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3238                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3239             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3240                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3241             ptr_cb= uvbuf;
3242             ptr_cr= uvbuf + 16;
3243         }
3244     }
3245
3246     if(!field_based)
3247         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3248     else{
3249         if(bottom_field){
3250             dest_y += s->linesize;
3251             dest_cb+= s->uvlinesize;
3252             dest_cr+= s->uvlinesize;
3253         }
3254
3255         if(field_select){
3256             ptr_y  += s->linesize;
3257             ptr_cb += s->uvlinesize;
3258             ptr_cr += s->uvlinesize;
3259         }
3260         //damn interlaced mode
3261         //FIXME boundary mirroring is not exactly correct here
3262         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3263         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3264     }
3265     if(!(s->flags&CODEC_FLAG_GRAY)){
3266         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3267         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3268     }
3269 }
3270
3271 inline int ff_h263_round_chroma(int x){
3272     if (x >= 0)
3273         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3274     else {
3275         x = -x;
3276         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3277     }
3278 }
3279
3280 /**
3281  * h263 chorma 4mv motion compensation.
3282  */
3283 static inline void chroma_4mv_motion(MpegEncContext *s,
3284                                      uint8_t *dest_cb, uint8_t *dest_cr,
3285                                      uint8_t **ref_picture,
3286                                      op_pixels_func *pix_op,
3287                                      int mx, int my){
3288     int dxy, emu=0, src_x, src_y, offset;
3289     uint8_t *ptr;
3290
3291     /* In case of 8X8, we construct a single chroma motion vector
3292        with a special rounding */
3293     mx= ff_h263_round_chroma(mx);
3294     my= ff_h263_round_chroma(my);
3295
3296     dxy = ((my & 1) << 1) | (mx & 1);
3297     mx >>= 1;
3298     my >>= 1;
3299
3300     src_x = s->mb_x * 8 + mx;
3301     src_y = s->mb_y * 8 + my;
3302     src_x = clip(src_x, -8, s->width/2);
3303     if (src_x == s->width/2)
3304         dxy &= ~1;
3305     src_y = clip(src_y, -8, s->height/2);
3306     if (src_y == s->height/2)
3307         dxy &= ~2;
3308
3309     offset = (src_y * (s->uvlinesize)) + src_x;
3310     ptr = ref_picture[1] + offset;
3311     if(s->flags&CODEC_FLAG_EMU_EDGE){
3312         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3313            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3314             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3315             ptr= s->edge_emu_buffer;
3316             emu=1;
3317         }
3318     }
3319     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3320
3321     ptr = ref_picture[2] + offset;
3322     if(emu){
3323         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3324         ptr= s->edge_emu_buffer;
3325     }
3326     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3327 }
3328
3329 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3330                                      uint8_t *dest_cb, uint8_t *dest_cr,
3331                                      uint8_t **ref_picture,
3332                                      h264_chroma_mc_func *pix_op,
3333                                      int mx, int my){
3334     const int lowres= s->avctx->lowres;
3335     const int block_s= 8>>lowres;
3336     const int s_mask= (2<<lowres)-1;
3337     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3338     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3339     int emu=0, src_x, src_y, offset, sx, sy;
3340     uint8_t *ptr;
3341
3342     if(s->quarter_sample){
3343         mx/=2;
3344         my/=2;
3345     }
3346
3347     /* In case of 8X8, we construct a single chroma motion vector
3348        with a special rounding */
3349     mx= ff_h263_round_chroma(mx);
3350     my= ff_h263_round_chroma(my);
3351
3352     sx= mx & s_mask;
3353     sy= my & s_mask;
3354     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3355     src_y = s->mb_y*block_s + (my >> (lowres+1));
3356
3357     offset = src_y * s->uvlinesize + src_x;
3358     ptr = ref_picture[1] + offset;
3359     if(s->flags&CODEC_FLAG_EMU_EDGE){
3360         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3361            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3362             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3363             ptr= s->edge_emu_buffer;
3364             emu=1;
3365         }
3366     }
3367     sx <<= 2 - lowres;
3368     sy <<= 2 - lowres;
3369     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3370
3371     ptr = ref_picture[2] + offset;
3372     if(emu){
3373         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3374         ptr= s->edge_emu_buffer;
3375     }
3376     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3377 }
3378
3379 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3380     /* fetch pixels for estimated mv 4 macroblocks ahead
3381      * optimized for 64byte cache lines */
3382     const int shift = s->quarter_sample ? 2 : 1;
3383     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3384     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3385     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3386     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3387     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3388     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3389 }
3390
3391 /**
3392  * motion compensation of a single macroblock
3393  * @param s context
3394  * @param dest_y luma destination pointer
3395  * @param dest_cb chroma cb/u destination pointer
3396  * @param dest_cr chroma cr/v destination pointer
3397  * @param dir direction (0->forward, 1->backward)
3398  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3399  * @param pic_op halfpel motion compensation function (average or put normally)
3400  * @param pic_op qpel motion compensation function (average or put normally)
3401  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3402  */
3403 static inline void MPV_motion(MpegEncContext *s,
3404                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3405                               int dir, uint8_t **ref_picture,
3406                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3407 {
3408     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3409     int mb_x, mb_y, i;
3410     uint8_t *ptr, *dest;
3411
3412     mb_x = s->mb_x;
3413     mb_y = s->mb_y;
3414
3415     prefetch_motion(s, ref_picture, dir);
3416
3417     if(s->obmc && s->pict_type != B_TYPE){
3418         int16_t mv_cache[4][4][2];
3419         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3420         const int mot_stride= s->b8_stride;
3421         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3422
3423         assert(!s->mb_skipped);
3424
3425         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3426         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3427         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3428
3429         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3430             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3431         }else{
3432             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3433         }
3434
3435         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3436             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3437             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3438         }else{
3439             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3440             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3441         }
3442
3443         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3444             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3445             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3446         }else{
3447             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3448             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3449         }
3450
3451         mx = 0;
3452         my = 0;
3453         for(i=0;i<4;i++) {
3454             const int x= (i&1)+1;
3455             const int y= (i>>1)+1;
3456             int16_t mv[5][2]= {
3457                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3458                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3459                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3460                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3461                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3462             //FIXME cleanup
3463             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3464                         ref_picture[0],
3465                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3466                         pix_op[1],
3467                         mv);
3468
3469             mx += mv[0][0];
3470             my += mv[0][1];
3471         }
3472         if(!(s->flags&CODEC_FLAG_GRAY))
3473             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3474
3475         return;
3476     }
3477
3478     switch(s->mv_type) {
3479     case MV_TYPE_16X16:
3480         if(s->mcsel){
3481             if(s->real_sprite_warping_points==1){
3482                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3483                             ref_picture);
3484             }else{
3485                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3486                             ref_picture);
3487             }
3488         }else if(s->quarter_sample){
3489             qpel_motion(s, dest_y, dest_cb, dest_cr,
3490                         0, 0, 0,
3491                         ref_picture, pix_op, qpix_op,
3492                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3493         }else if(s->mspel){
3494             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3495                         ref_picture, pix_op,
3496                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3497         }else
3498         {
3499             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3500                         0, 0, 0,
3501                         ref_picture, pix_op,
3502                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3503         }
3504         break;
3505     case MV_TYPE_8X8:
3506         mx = 0;
3507         my = 0;
3508         if(s->quarter_sample){
3509             for(i=0;i<4;i++) {
3510                 motion_x = s->mv[dir][i][0];
3511                 motion_y = s->mv[dir][i][1];
3512
3513                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3514                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3515                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3516
3517                 /* WARNING: do no forget half pels */
3518                 src_x = clip(src_x, -16, s->width);
3519                 if (src_x == s->width)
3520                     dxy &= ~3;
3521                 src_y = clip(src_y, -16, s->height);
3522                 if (src_y == s->height)
3523                     dxy &= ~12;
3524
3525                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3526                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3527                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3528                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3529                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3530                         ptr= s->edge_emu_buffer;
3531                     }
3532                 }
3533                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3534                 qpix_op[1][dxy](dest, ptr, s->linesize);
3535
3536                 mx += s->mv[dir][i][0]/2;
3537                 my += s->mv[dir][i][1]/2;
3538             }
3539         }else{
3540             for(i=0;i<4;i++) {
3541                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3542                             ref_picture[0], 0, 0,
3543                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3544                             s->width, s->height, s->linesize,
3545                             s->h_edge_pos, s->v_edge_pos,
3546                             8, 8, pix_op[1],
3547                             s->mv[dir][i][0], s->mv[dir][i][1]);
3548
3549                 mx += s->mv[dir][i][0];
3550                 my += s->mv[dir][i][1];
3551             }
3552         }
3553
3554         if(!(s->flags&CODEC_FLAG_GRAY))
3555             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3556         break;
3557     case MV_TYPE_FIELD:
3558         if (s->picture_structure == PICT_FRAME) {
3559             if(s->quarter_sample){
3560                 for(i=0; i<2; i++){
3561                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3562                                 1, i, s->field_select[dir][i],
3563                                 ref_picture, pix_op, qpix_op,
3564                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3565                 }
3566             }else{
3567                 /* top field */
3568                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3569                             1, 0, s->field_select[dir][0],
3570                             ref_picture, pix_op,
3571                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3572                 /* bottom field */
3573                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3574                             1, 1, s->field_select[dir][1],
3575                             ref_picture, pix_op,
3576                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3577             }
3578         } else {
3579             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3580                 ref_picture= s->current_picture_ptr->data;
3581             }
3582
3583             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3584                         0, 0, s->field_select[dir][0],
3585                         ref_picture, pix_op,
3586                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3587         }
3588         break;
3589     case MV_TYPE_16X8:
3590         for(i=0; i<2; i++){
3591             uint8_t ** ref2picture;
3592
3593             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3594                 ref2picture= ref_picture;
3595             }else{
3596                 ref2picture= s->current_picture_ptr->data;
3597             }
3598
3599             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3600                         0, 0, s->field_select[dir][i],
3601                         ref2picture, pix_op,
3602                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3603
3604             dest_y += 16*s->linesize;
3605             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3606             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3607         }
3608         break;
3609     case MV_TYPE_DMV:
3610         if(s->picture_structure == PICT_FRAME){
3611             for(i=0; i<2; i++){
3612                 int j;
3613                 for(j=0; j<2; j++){
3614                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3615                                 1, j, j^i,
3616                                 ref_picture, pix_op,
3617                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3618                 }
3619                 pix_op = s->dsp.avg_pixels_tab;
3620             }
3621         }else{
3622             for(i=0; i<2; i++){
3623                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3624                             0, 0, s->picture_structure != i+1,
3625                             ref_picture, pix_op,
3626                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3627
3628                 // after put we make avg of the same block
3629                 pix_op=s->dsp.avg_pixels_tab;
3630
3631                 //opposite parity is always in the same frame if this is second field
3632                 if(!s->first_field){
3633                     ref_picture = s->current_picture_ptr->data;
3634                 }
3635             }
3636         }
3637     break;
3638     default: assert(0);
3639     }
3640 }
3641
3642 /**
3643  * motion compensation of a single macroblock
3644  * @param s context
3645  * @param dest_y luma destination pointer
3646  * @param dest_cb chroma cb/u destination pointer
3647  * @param dest_cr chroma cr/v destination pointer
3648  * @param dir direction (0->forward, 1->backward)
3649  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3650  * @param pic_op halfpel motion compensation function (average or put normally)
3651  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3652  */
3653 static inline void MPV_motion_lowres(MpegEncContext *s,
3654                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3655                               int dir, uint8_t **ref_picture,
3656                               h264_chroma_mc_func *pix_op)
3657 {
3658     int mx, my;
3659     int mb_x, mb_y, i;
3660     const int lowres= s->avctx->lowres;
3661     const int block_s= 8>>lowres;
3662
3663     mb_x = s->mb_x;
3664     mb_y = s->mb_y;
3665
3666     switch(s->mv_type) {
3667     case MV_TYPE_16X16:
3668         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3669                     0, 0, 0,
3670                     ref_picture, pix_op,
3671                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3672         break;
3673     case MV_TYPE_8X8:
3674         mx = 0;
3675         my = 0;
3676             for(i=0;i<4;i++) {
3677                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3678                             ref_picture[0], 0, 0,
3679                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3680                             s->width, s->height, s->linesize,
3681                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3682                             block_s, block_s, pix_op,
3683                             s->mv[dir][i][0], s->mv[dir][i][1]);
3684
3685                 mx += s->mv[dir][i][0];
3686                 my += s->mv[dir][i][1];
3687             }
3688
3689         if(!(s->flags&CODEC_FLAG_GRAY))
3690             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3691         break;
3692     case MV_TYPE_FIELD:
3693         if (s->picture_structure == PICT_FRAME) {
3694             /* top field */
3695             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3696                         1, 0, s->field_select[dir][0],
3697                         ref_picture, pix_op,
3698                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3699             /* bottom field */
3700             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3701                         1, 1, s->field_select[dir][1],
3702                         ref_picture, pix_op,
3703                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3704         } else {
3705             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3706                 ref_picture= s->current_picture_ptr->data;
3707             }
3708
3709             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3710                         0, 0, s->field_select[dir][0],
3711                         ref_picture, pix_op,
3712                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3713         }
3714         break;
3715     case MV_TYPE_16X8:
3716         for(i=0; i<2; i++){
3717             uint8_t ** ref2picture;
3718
3719             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3720                 ref2picture= ref_picture;
3721             }else{
3722                 ref2picture= s->current_picture_ptr->data;
3723             }
3724
3725             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3726                         0, 0, s->field_select[dir][i],
3727                         ref2picture, pix_op,
3728                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3729
3730             dest_y += 2*block_s*s->linesize;
3731             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3732             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3733         }
3734         break;
3735     case MV_TYPE_DMV:
3736         if(s->picture_structure == PICT_FRAME){
3737             for(i=0; i<2; i++){
3738                 int j;
3739                 for(j=0; j<2; j++){
3740                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3741                                 1, j, j^i,
3742                                 ref_picture, pix_op,
3743                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3744                 }
3745                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3746             }
3747         }else{
3748             for(i=0; i<2; i++){
3749                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3750                             0, 0, s->picture_structure != i+1,
3751                             ref_picture, pix_op,
3752                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3753
3754                 // after put we make avg of the same block
3755                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3756
3757                 //opposite parity is always in the same frame if this is second field
3758                 if(!s->first_field){
3759                     ref_picture = s->current_picture_ptr->data;
3760                 }
3761             }
3762         }
3763     break;
3764     default: assert(0);
3765     }
3766 }
3767
3768 /* put block[] to dest[] */
3769 static inline void put_dct(MpegEncContext *s,
3770                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3771 {
3772     s->dct_unquantize_intra(s, block, i, qscale);
3773     s->dsp.idct_put (dest, line_size, block);
3774 }
3775
3776 /* add block[] to dest[] */
3777 static inline void add_dct(MpegEncContext *s,
3778                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3779 {
3780     if (s->block_last_index[i] >= 0) {
3781         s->dsp.idct_add (dest, line_size, block);
3782     }
3783 }
3784
3785 static inline void add_dequant_dct(MpegEncContext *s,
3786                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3787 {
3788     if (s->block_last_index[i] >= 0) {
3789         s->dct_unquantize_inter(s, block, i, qscale);
3790
3791         s->dsp.idct_add (dest, line_size, block);
3792     }
3793 }
3794
3795 /**
3796  * cleans dc, ac, coded_block for the current non intra MB
3797  */
3798 void ff_clean_intra_table_entries(MpegEncContext *s)
3799 {
3800     int wrap = s->b8_stride;
3801     int xy = s->block_index[0];
3802
3803     s->dc_val[0][xy           ] =
3804     s->dc_val[0][xy + 1       ] =
3805     s->dc_val[0][xy     + wrap] =
3806     s->dc_val[0][xy + 1 + wrap] = 1024;
3807     /* ac pred */
3808     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3809     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3810     if (s->msmpeg4_version>=3) {
3811         s->coded_block[xy           ] =
3812         s->coded_block[xy + 1       ] =
3813         s->coded_block[xy     + wrap] =
3814         s->coded_block[xy + 1 + wrap] = 0;
3815     }
3816     /* chroma */
3817     wrap = s->mb_stride;
3818     xy = s->mb_x + s->mb_y * wrap;
3819     s->dc_val[1][xy] =
3820     s->dc_val[2][xy] = 1024;
3821     /* ac pred */
3822     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3823     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3824
3825     s->mbintra_table[xy]= 0;
3826 }
3827
3828 /* generic function called after a macroblock has been parsed by the
3829    decoder or after it has been encoded by the encoder.
3830
3831    Important variables used:
3832    s->mb_intra : true if intra macroblock
3833    s->mv_dir   : motion vector direction
3834    s->mv_type  : motion vector type
3835    s->mv       : motion vector
3836    s->interlaced_dct : true if interlaced dct used (mpeg2)
3837  */
3838 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3839 {
3840     int mb_x, mb_y;
3841     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3842 #ifdef HAVE_XVMC
3843     if(s->avctx->xvmc_acceleration){
3844         XVMC_decode_mb(s);//xvmc uses pblocks
3845         return;
3846     }
3847 #endif
3848
3849     mb_x = s->mb_x;
3850     mb_y = s->mb_y;
3851
3852     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3853        /* save DCT coefficients */
3854        int i,j;
3855        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3856        for(i=0; i<6; i++)
3857            for(j=0; j<64; j++)
3858                *dct++ = block[i][s->dsp.idct_permutation[j]];
3859     }
3860
3861     s->current_picture.qscale_table[mb_xy]= s->qscale;
3862
3863     /* update DC predictors for P macroblocks */
3864     if (!s->mb_intra) {
3865         if (s->h263_pred || s->h263_aic) {
3866             if(s->mbintra_table[mb_xy])
3867                 ff_clean_intra_table_entries(s);
3868         } else {
3869             s->last_dc[0] =
3870             s->last_dc[1] =
3871             s->last_dc[2] = 128 << s->intra_dc_precision;
3872         }
3873     }
3874     else if (s->h263_pred || s->h263_aic)
3875         s->mbintra_table[mb_xy]=1;
3876
3877     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3878         uint8_t *dest_y, *dest_cb, *dest_cr;
3879         int dct_linesize, dct_offset;
3880         op_pixels_func (*op_pix)[4];
3881         qpel_mc_func (*op_qpix)[16];
3882         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3883         const int uvlinesize= s->current_picture.linesize[1];
3884         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3885         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3886
3887         /* avoid copy if macroblock skipped in last frame too */
3888         /* skip only during decoding as we might trash the buffers during encoding a bit */
3889         if(!s->encoding){
3890             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3891             const int age= s->current_picture.age;
3892
3893             assert(age);
3894
3895             if (s->mb_skipped) {
3896                 s->mb_skipped= 0;
3897                 assert(s->pict_type!=I_TYPE);
3898
3899                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3900                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3901
3902                 /* if previous was skipped too, then nothing to do !  */
3903                 if (*mbskip_ptr >= age && s->current_picture.reference){
3904                     return;
3905                 }
3906             } else if(!s->current_picture.reference){
3907                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3908                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3909             } else{
3910                 *mbskip_ptr = 0; /* not skipped */
3911             }
3912         }
3913
3914         dct_linesize = linesize << s->interlaced_dct;
3915         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3916
3917         if(readable){
3918             dest_y=  s->dest[0];
3919             dest_cb= s->dest[1];
3920             dest_cr= s->dest[2];
3921         }else{
3922             dest_y = s->b_scratchpad;
3923             dest_cb= s->b_scratchpad+16*linesize;
3924             dest_cr= s->b_scratchpad+32*linesize;
3925         }
3926
3927         if (!s->mb_intra) {
3928             /* motion handling */
3929             /* decoding or more than one mb_type (MC was already done otherwise) */
3930             if(!s->encoding){
3931                 if(lowres_flag){
3932                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3933
3934                     if (s->mv_dir & MV_DIR_FORWARD) {
3935                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3936                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3937                     }
3938                     if (s->mv_dir & MV_DIR_BACKWARD) {
3939                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3940                     }
3941                 }else{
3942                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3943                         op_pix = s->dsp.put_pixels_tab;
3944                         op_qpix= s->dsp.put_qpel_pixels_tab;
3945                     }else{
3946                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3947                         op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3948                     }
3949                     if (s->mv_dir & MV_DIR_FORWARD) {
3950                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3951                         op_pix = s->dsp.avg_pixels_tab;
3952                         op_qpix= s->dsp.avg_qpel_pixels_tab;
3953                     }
3954                     if (s->mv_dir & MV_DIR_BACKWARD) {
3955                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3956                     }
3957                 }
3958             }
3959
3960             /* skip dequant / idct if we are really late ;) */
3961             if(s->hurry_up>1) goto skip_idct;
3962             if(s->avctx->skip_idct){
3963                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3964                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3965                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3966                     goto skip_idct;
3967             }
3968
3969             /* add dct residue */
3970             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3971                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3972                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3973                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3974                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3975                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3976
3977                 if(!(s->flags&CODEC_FLAG_GRAY)){
3978                     if (s->chroma_y_shift){
3979                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3980                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3981                     }else{
3982                         dct_linesize >>= 1;
3983                         dct_offset >>=1;
3984                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
3985                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
3986                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
3987                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
3988                     }
3989                 }
3990             } else if(s->codec_id != CODEC_ID_WMV2){
3991                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3992                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3993                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3994                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3995
3996                 if(!(s->flags&CODEC_FLAG_GRAY)){
3997                     if(s->chroma_y_shift){//Chroma420
3998                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
3999                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4000                     }else{
4001                         //chroma422
4002                         dct_linesize = uvlinesize << s->interlaced_dct;
4003                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4004
4005                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4006                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4007                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4008                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4009                         if(!s->chroma_x_shift){//Chroma444
4010                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4011                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4012                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4013                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4014                         }
4015                     }
4016                 }//fi gray
4017             }
4018             else{
4019                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4020             }
4021         } else {
4022             /* dct only in intra block */
4023             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4024                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4025                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4026                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4027                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4028
4029                 if(!(s->flags&CODEC_FLAG_GRAY)){
4030                     if(s->chroma_y_shift){
4031                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4032                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4033                     }else{
4034                         dct_offset >>=1;
4035                         dct_linesize >>=1;
4036                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4037                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4038                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4039                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4040                     }
4041                 }
4042             }else{
4043                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4044                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4045                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4046                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4047
4048                 if(!(s->flags&CODEC_FLAG_GRAY)){
4049                     if(s->chroma_y_shift){
4050                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4051                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4052                     }else{
4053
4054                         dct_linesize = uvlinesize << s->interlaced_dct;
4055                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4056
4057                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4058                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4059                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4060                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4061                         if(!s->chroma_x_shift){//Chroma444
4062                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4063                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4064                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4065                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4066                         }
4067                     }
4068                 }//gray
4069             }
4070         }
4071 skip_idct:
4072         if(!readable){
4073             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4074             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4075             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4076         }
4077     }
4078 }
4079
4080 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4081     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4082     else                  MPV_decode_mb_internal(s, block, 0);
4083 }
4084
4085 #ifdef CONFIG_ENCODERS
4086
4087 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4088 {
4089     static const char tab[64]=
4090         {3,2,2,1,1,1,1,1,
4091          1,1,1,1,1,1,1,1,
4092          1,1,1,1,1,1,1,1,
4093          0,0,0,0,0,0,0,0,
4094          0,0,0,0,0,0,0,0,
4095          0,0,0,0,0,0,0,0,
4096          0,0,0,0,0,0,0,0,
4097          0,0,0,0,0,0,0,0};
4098     int score=0;
4099     int run=0;
4100     int i;
4101     DCTELEM *block= s->block[n];
4102     const int last_index= s->block_last_index[n];
4103     int skip_dc;
4104
4105     if(threshold<0){
4106         skip_dc=0;
4107         threshold= -threshold;
4108     }else
4109         skip_dc=1;
4110
4111     /* are all which we could set to zero are allready zero? */
4112     if(last_index<=skip_dc - 1) return;
4113
4114     for(i=0; i<=last_index; i++){
4115         const int j = s->intra_scantable.permutated[i];
4116         const int level = ABS(block[j]);
4117         if(level==1){
4118             if(skip_dc && i==0) continue;
4119             score+= tab[run];
4120             run=0;
4121         }else if(level>1){
4122             return;
4123         }else{
4124             run++;
4125         }
4126     }
4127     if(score >= threshold) return;
4128     for(i=skip_dc; i<=last_index; i++){
4129         const int j = s->intra_scantable.permutated[i];
4130         block[j]=0;
4131     }
4132     if(block[0]) s->block_last_index[n]= 0;
4133     else         s->block_last_index[n]= -1;
4134 }
4135
4136 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4137 {
4138     int i;
4139     const int maxlevel= s->max_qcoeff;
4140     const int minlevel= s->min_qcoeff;
4141     int overflow=0;
4142
4143     if(s->mb_intra){
4144         i=1; //skip clipping of intra dc
4145     }else
4146         i=0;
4147
4148     for(;i<=last_index; i++){
4149         const int j= s->intra_scantable.permutated[i];
4150         int level = block[j];
4151
4152         if     (level>maxlevel){
4153             level=maxlevel;
4154             overflow++;
4155         }else if(level<minlevel){
4156             level=minlevel;
4157             overflow++;
4158         }
4159
4160         block[j]= level;
4161     }
4162
4163     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4164         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4165 }
4166
4167 #endif //CONFIG_ENCODERS
4168
4169 /**
4170  *
4171  * @param h is the normal height, this will be reduced automatically if needed for the last row
4172  */
4173 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4174     if (s->avctx->draw_horiz_band) {
4175         AVFrame *src;
4176         int offset[4];
4177
4178         if(s->picture_structure != PICT_FRAME){
4179             h <<= 1;
4180             y <<= 1;
4181             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4182         }
4183
4184         h= FFMIN(h, s->avctx->height - y);
4185
4186         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4187             src= (AVFrame*)s->current_picture_ptr;
4188         else if(s->last_picture_ptr)
4189             src= (AVFrame*)s->last_picture_ptr;
4190         else
4191             return;
4192
4193         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4194             offset[0]=
4195             offset[1]=
4196             offset[2]=
4197             offset[3]= 0;
4198         }else{
4199             offset[0]= y * s->linesize;;
4200             offset[1]=
4201             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4202             offset[3]= 0;
4203         }
4204
4205         emms_c();
4206
4207         s->avctx->draw_horiz_band(s->avctx, src, offset,
4208                                   y, s->picture_structure, h);
4209     }
4210 }
4211
4212 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4213     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4214     const int uvlinesize= s->current_picture.linesize[1];
4215     const int mb_size= 4 - s->avctx->lowres;
4216
4217     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4218     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4219     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4220     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4221     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4222     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4223     //block_index is not used by mpeg2, so it is not affected by chroma_format
4224
4225     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4226     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4227     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4228
4229     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4230     {
4231         s->dest[0] += s->mb_y *   linesize << mb_size;
4232         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4233         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4234     }
4235 }
4236
4237 #ifdef CONFIG_ENCODERS
4238
4239 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4240     int x, y;
4241 //FIXME optimize
4242     for(y=0; y<8; y++){
4243         for(x=0; x<8; x++){
4244             int x2, y2;
4245             int sum=0;
4246             int sqr=0;
4247             int count=0;
4248
4249             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4250                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4251                     int v= ptr[x2 + y2*stride];
4252                     sum += v;
4253                     sqr += v*v;
4254                     count++;
4255                 }
4256             }
4257             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4258         }
4259     }
4260 }
4261
4262 static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4263 {
4264     int16_t weight[8][64];
4265     DCTELEM orig[8][64];
4266     const int mb_x= s->mb_x;
4267     const int mb_y= s->mb_y;
4268     int i;
4269     int skip_dct[8];
4270     int dct_offset   = s->linesize*8; //default for progressive frames
4271     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4272     int wrap_y, wrap_c;
4273
4274     for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
4275
4276     if(s->adaptive_quant){
4277         const int last_qp= s->qscale;
4278         const int mb_xy= mb_x + mb_y*s->mb_stride;
4279
4280         s->lambda= s->lambda_table[mb_xy];
4281         update_qscale(s);
4282
4283         if(!(s->flags&CODEC_FLAG_QP_RD)){
4284             s->dquant= s->qscale - last_qp;
4285
4286             if(s->out_format==FMT_H263){
4287                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4288
4289                 if(s->codec_id==CODEC_ID_MPEG4){
4290                     if(!s->mb_intra){
4291                         if(s->pict_type == B_TYPE){
4292                             if(s->dquant&1)
4293                                 s->dquant= (s->dquant/2)*2;
4294                             if(s->mv_dir&MV_DIRECT)
4295                                 s->dquant= 0;
4296                         }
4297                         if(s->mv_type==MV_TYPE_8X8)
4298                             s->dquant=0;
4299                     }
4300                 }
4301             }
4302         }
4303         ff_set_qscale(s, last_qp + s->dquant);
4304     }else if(s->flags&CODEC_FLAG_QP_RD)
4305         ff_set_qscale(s, s->qscale + s->dquant);
4306
4307     wrap_y = s->linesize;
4308     wrap_c = s->uvlinesize;
4309     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4310     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4311     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4312
4313     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4314         uint8_t *ebuf= s->edge_emu_buffer + 32;
4315         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4316         ptr_y= ebuf;
4317         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4318         ptr_cb= ebuf+18*wrap_y;
4319         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4320         ptr_cr= ebuf+18*wrap_y+8;
4321     }
4322
4323     if (s->mb_intra) {
4324         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4325             int progressive_score, interlaced_score;
4326
4327             s->interlaced_dct=0;
4328             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4329                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4330
4331             if(progressive_score > 0){
4332                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4333                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4334                 if(progressive_score > interlaced_score){
4335                     s->interlaced_dct=1;
4336
4337                     dct_offset= wrap_y;
4338                     wrap_y<<=1;
4339                     if (s->chroma_format == CHROMA_422)
4340                         wrap_c<<=1;
4341                 }
4342             }
4343         }
4344
4345         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4346         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4347         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4348         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4349
4350         if(s->flags&CODEC_FLAG_GRAY){
4351             skip_dct[4]= 1;
4352             skip_dct[5]= 1;
4353         }else{
4354             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4355             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4356             if(!s->chroma_y_shift){ /* 422 */
4357                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4358                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4359             }
4360         }
4361     }else{
4362         op_pixels_func (*op_pix)[4];
4363         qpel_mc_func (*op_qpix)[16];
4364         uint8_t *dest_y, *dest_cb, *dest_cr;
4365
4366         dest_y  = s->dest[0];
4367         dest_cb = s->dest[1];
4368         dest_cr = s->dest[2];
4369
4370         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4371             op_pix = s->dsp.put_pixels_tab;
4372             op_qpix= s->dsp.put_qpel_pixels_tab;
4373         }else{
4374             op_pix = s->dsp.put_no_rnd_pixels_tab;
4375             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4376         }
4377
4378         if (s->mv_dir & MV_DIR_FORWARD) {
4379             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4380             op_pix = s->dsp.avg_pixels_tab;
4381             op_qpix= s->dsp.avg_qpel_pixels_tab;
4382         }
4383         if (s->mv_dir & MV_DIR_BACKWARD) {
4384             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4385         }
4386
4387         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4388             int progressive_score, interlaced_score;
4389
4390             s->interlaced_dct=0;
4391             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4392                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4393
4394             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4395
4396             if(progressive_score>0){
4397                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4398                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4399
4400                 if(progressive_score > interlaced_score){
4401                     s->interlaced_dct=1;
4402
4403                     dct_offset= wrap_y;
4404                     wrap_y<<=1;
4405                     if (s->chroma_format == CHROMA_422)
4406                         wrap_c<<=1;
4407                 }
4408             }
4409         }
4410
4411         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4412         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4413         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4414         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4415
4416         if(s->flags&CODEC_FLAG_GRAY){
4417             skip_dct[4]= 1;
4418             skip_dct[5]= 1;
4419         }else{
4420             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4421             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4422             if(!s->chroma_y_shift){ /* 422 */
4423                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4424                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4425             }
4426         }
4427         /* pre quantization */
4428         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4429             //FIXME optimize
4430             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4431             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4432             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4433             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4434             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4435             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4436             if(!s->chroma_y_shift){ /* 422 */
4437                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4438                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4439             }
4440         }
4441     }
4442
4443     if(s->avctx->quantizer_noise_shaping){
4444         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4445         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4446         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4447         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4448         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4449         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4450         if(!s->chroma_y_shift){ /* 422 */
4451             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4452             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4453         }
4454         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4455     }
4456
4457     /* DCT & quantize */
4458     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4459     {
4460         for(i=0;i<mb_block_count;i++) {
4461             if(!skip_dct[i]){
4462                 int overflow;
4463                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4464             // FIXME we could decide to change to quantizer instead of clipping
4465             // JS: I don't think that would be a good idea it could lower quality instead
4466             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4467                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4468             }else
4469                 s->block_last_index[i]= -1;
4470         }
4471         if(s->avctx->quantizer_noise_shaping){
4472             for(i=0;i<mb_block_count;i++) {
4473                 if(!skip_dct[i]){
4474                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4475                 }
4476             }
4477         }
4478
4479         if(s->luma_elim_threshold && !s->mb_intra)
4480             for(i=0; i<4; i++)
4481                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4482         if(s->chroma_elim_threshold && !s->mb_intra)
4483             for(i=4; i<mb_block_count; i++)
4484                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4485
4486         if(s->flags & CODEC_FLAG_CBP_RD){
4487             for(i=0;i<mb_block_count;i++) {
4488                 if(s->block_last_index[i] == -1)
4489                     s->coded_score[i]= INT_MAX/256;
4490             }
4491         }
4492     }
4493
4494     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4495         s->block_last_index[4]=
4496         s->block_last_index[5]= 0;
4497         s->block[4][0]=
4498         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4499     }
4500
4501     //non c quantize code returns incorrect block_last_index FIXME
4502     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4503         for(i=0; i<mb_block_count; i++){
4504             int j;
4505             if(s->block_last_index[i]>0){
4506                 for(j=63; j>0; j--){
4507                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4508                 }
4509                 s->block_last_index[i]= j;
4510             }
4511         }
4512     }
4513
4514     /* huffman encode */
4515     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4516     case CODEC_ID_MPEG1VIDEO:
4517     case CODEC_ID_MPEG2VIDEO:
4518         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4519     case CODEC_ID_MPEG4:
4520         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4521     case CODEC_ID_MSMPEG4V2:
4522     case CODEC_ID_MSMPEG4V3:
4523     case CODEC_ID_WMV1:
4524         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4525     case CODEC_ID_WMV2:
4526          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4527 #ifdef CONFIG_H261_ENCODER
4528     case CODEC_ID_H261:
4529         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4530 #endif
4531     case CODEC_ID_H263:
4532     case CODEC_ID_H263P:
4533     case CODEC_ID_FLV1:
4534     case CODEC_ID_RV10:
4535     case CODEC_ID_RV20:
4536         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4537     case CODEC_ID_MJPEG:
4538         mjpeg_encode_mb(s, s->block); break;
4539     default:
4540         assert(0);
4541     }
4542 }
4543
4544 static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4545 {
4546     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4547     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4548 }
4549
4550 #endif //CONFIG_ENCODERS
4551
4552 void ff_mpeg_flush(AVCodecContext *avctx){
4553     int i;
4554     MpegEncContext *s = avctx->priv_data;
4555
4556     if(s==NULL || s->picture==NULL)
4557         return;
4558
4559     for(i=0; i<MAX_PICTURE_COUNT; i++){
4560        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4561                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4562         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4563     }
4564     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4565
4566     s->mb_x= s->mb_y= 0;
4567
4568     s->parse_context.state= -1;
4569     s->parse_context.frame_start_found= 0;
4570     s->parse_context.overread= 0;
4571     s->parse_context.overread_index= 0;
4572     s->parse_context.index= 0;
4573     s->parse_context.last_index= 0;
4574     s->bitstream_buffer_size=0;
4575 }
4576
4577 #ifdef CONFIG_ENCODERS
4578 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4579 {
4580     const uint16_t *srcw= (uint16_t*)src;
4581     int words= length>>4;
4582     int bits= length&15;
4583     int i;
4584
4585     if(length==0) return;
4586
4587     if(words < 16){
4588         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4589     }else if(put_bits_count(pb)&7){
4590         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4591     }else{
4592         for(i=0; put_bits_count(pb)&31; i++)
4593             put_bits(pb, 8, src[i]);
4594         flush_put_bits(pb);
4595         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4596         skip_put_bytes(pb, 2*words-i);
4597     }
4598
4599     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4600 }
4601
4602 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4603     int i;
4604
4605     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4606
4607     /* mpeg1 */
4608     d->mb_skip_run= s->mb_skip_run;
4609     for(i=0; i<3; i++)
4610         d->last_dc[i]= s->last_dc[i];
4611
4612     /* statistics */
4613     d->mv_bits= s->mv_bits;
4614     d->i_tex_bits= s->i_tex_bits;
4615     d->p_tex_bits= s->p_tex_bits;
4616     d->i_count= s->i_count;
4617     d->f_count= s->f_count;
4618     d->b_count= s->b_count;
4619     d->skip_count= s->skip_count;
4620     d->misc_bits= s->misc_bits;
4621     d->last_bits= 0;
4622
4623     d->mb_skipped= 0;
4624     d->qscale= s->qscale;
4625     d->dquant= s->dquant;
4626 }
4627
4628 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4629     int i;
4630
4631     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4632     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4633
4634     /* mpeg1 */
4635     d->mb_skip_run= s->mb_skip_run;
4636     for(i=0; i<3; i++)
4637         d->last_dc[i]= s->last_dc[i];
4638
4639     /* statistics */
4640     d->mv_bits= s->mv_bits;
4641     d->i_tex_bits= s->i_tex_bits;
4642     d->p_tex_bits= s->p_tex_bits;
4643     d->i_count= s->i_count;
4644     d->f_count= s->f_count;
4645     d->b_count= s->b_count;
4646     d->skip_count= s->skip_count;
4647     d->misc_bits= s->misc_bits;
4648
4649     d->mb_intra= s->mb_intra;
4650     d->mb_skipped= s->mb_skipped;
4651     d->mv_type= s->mv_type;
4652     d->mv_dir= s->mv_dir;
4653     d->pb= s->pb;
4654     if(s->data_partitioning){
4655         d->pb2= s->pb2;
4656         d->tex_pb= s->tex_pb;
4657     }
4658     d->block= s->block;
4659     for(i=0; i<8; i++)
4660         d->block_last_index[i]= s->block_last_index[i];
4661     d->interlaced_dct= s->interlaced_dct;
4662     d->qscale= s->qscale;
4663 }
4664
4665 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4666                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4667                            int *dmin, int *next_block, int motion_x, int motion_y)
4668 {
4669     int score;
4670     uint8_t *dest_backup[3];
4671
4672     copy_context_before_encode(s, backup, type);
4673
4674     s->block= s->blocks[*next_block];
4675     s->pb= pb[*next_block];
4676     if(s->data_partitioning){
4677         s->pb2   = pb2   [*next_block];
4678         s->tex_pb= tex_pb[*next_block];
4679     }
4680
4681     if(*next_block){
4682         memcpy(dest_backup, s->dest, sizeof(s->dest));
4683         s->dest[0] = s->rd_scratchpad;
4684         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4685         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4686         assert(s->linesize >= 32); //FIXME
4687     }
4688
4689     encode_mb(s, motion_x, motion_y);
4690
4691     score= put_bits_count(&s->pb);
4692     if(s->data_partitioning){
4693         score+= put_bits_count(&s->pb2);
4694         score+= put_bits_count(&s->tex_pb);
4695     }
4696
4697     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4698         MPV_decode_mb(s, s->block);
4699
4700         score *= s->lambda2;
4701         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4702     }
4703
4704     if(*next_block){
4705         memcpy(s->dest, dest_backup, sizeof(s->dest));
4706     }
4707
4708     if(score<*dmin){
4709         *dmin= score;
4710         *next_block^=1;
4711
4712         copy_context_after_encode(best, s, type);
4713     }
4714 }
4715
4716 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4717     uint32_t *sq = squareTbl + 256;
4718     int acc=0;
4719     int x,y;
4720
4721     if(w==16 && h==16)
4722         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4723     else if(w==8 && h==8)
4724         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4725
4726     for(y=0; y<h; y++){
4727         for(x=0; x<w; x++){
4728             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4729         }
4730     }
4731
4732     assert(acc>=0);
4733
4734     return acc;
4735 }
4736
4737 static int sse_mb(MpegEncContext *s){
4738     int w= 16;
4739     int h= 16;
4740
4741     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4742     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4743
4744     if(w==16 && h==16)
4745       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4746         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4747                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4748                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4749       }else{
4750         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4751                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4752                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4753       }
4754     else
4755         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4756                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4757                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4758 }
4759
4760 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4761     MpegEncContext *s= arg;
4762
4763
4764     s->me.pre_pass=1;
4765     s->me.dia_size= s->avctx->pre_dia_size;
4766     s->first_slice_line=1;
4767     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4768         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4769             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4770         }
4771         s->first_slice_line=0;
4772     }
4773
4774     s->me.pre_pass=0;
4775
4776     return 0;
4777 }
4778
4779 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4780     MpegEncContext *s= arg;
4781
4782     s->me.dia_size= s->avctx->dia_size;
4783     s->first_slice_line=1;
4784     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4785         s->mb_x=0; //for block init below
4786         ff_init_block_index(s);
4787         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4788             s->block_index[0]+=2;
4789             s->block_index[1]+=2;
4790             s->block_index[2]+=2;
4791             s->block_index[3]+=2;
4792
4793             /* compute motion vector & mb_type and store in context */
4794             if(s->pict_type==B_TYPE)
4795                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4796             else
4797                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4798         }
4799         s->first_slice_line=0;
4800     }
4801     return 0;
4802 }
4803
4804 static int mb_var_thread(AVCodecContext *c, void *arg){
4805     MpegEncContext *s= arg;
4806     int mb_x, mb_y;
4807
4808     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4809         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4810             int xx = mb_x * 16;
4811             int yy = mb_y * 16;
4812             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4813             int varc;
4814             int sum = s->dsp.pix_sum(pix, s->linesize);
4815
4816             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4817
4818             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4819             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4820             s->me.mb_var_sum_temp    += varc;
4821         }
4822     }
4823     return 0;
4824 }
4825
4826 static void write_slice_end(MpegEncContext *s){
4827     if(s->codec_id==CODEC_ID_MPEG4){
4828         if(s->partitioned_frame){
4829             ff_mpeg4_merge_partitions(s);
4830         }
4831
4832         ff_mpeg4_stuffing(&s->pb);
4833     }else if(s->out_format == FMT_MJPEG){
4834         ff_mjpeg_stuffing(&s->pb);
4835     }
4836
4837     align_put_bits(&s->pb);
4838     flush_put_bits(&s->pb);
4839
4840     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4841         s->misc_bits+= get_bits_diff(s);
4842 }
4843
4844 static int encode_thread(AVCodecContext *c, void *arg){
4845     MpegEncContext *s= arg;
4846     int mb_x, mb_y, pdif = 0;
4847     int i, j;
4848     MpegEncContext best_s, backup_s;
4849     uint8_t bit_buf[2][MAX_MB_BYTES];
4850     uint8_t bit_buf2[2][MAX_MB_BYTES];
4851     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4852     PutBitContext pb[2], pb2[2], tex_pb[2];
4853 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4854
4855     for(i=0; i<2; i++){
4856         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4857         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4858         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4859     }
4860
4861     s->last_bits= put_bits_count(&s->pb);
4862     s->mv_bits=0;
4863     s->misc_bits=0;
4864     s->i_tex_bits=0;
4865     s->p_tex_bits=0;
4866     s->i_count=0;
4867     s->f_count=0;
4868     s->b_count=0;
4869     s->skip_count=0;
4870
4871     for(i=0; i<3; i++){
4872         /* init last dc values */
4873         /* note: quant matrix value (8) is implied here */
4874         s->last_dc[i] = 128 << s->intra_dc_precision;
4875
4876         s->current_picture.error[i] = 0;
4877     }
4878     s->mb_skip_run = 0;
4879     memset(s->last_mv, 0, sizeof(s->last_mv));
4880
4881     s->last_mv_dir = 0;
4882
4883     switch(s->codec_id){
4884     case CODEC_ID_H263:
4885     case CODEC_ID_H263P:
4886     case CODEC_ID_FLV1:
4887         s->gob_index = ff_h263_get_gob_height(s);
4888         break;
4889     case CODEC_ID_MPEG4:
4890         if(s->partitioned_frame)
4891             ff_mpeg4_init_partitions(s);
4892         break;
4893     }
4894
4895     s->resync_mb_x=0;
4896     s->resync_mb_y=0;
4897     s->first_slice_line = 1;
4898     s->ptr_lastgob = s->pb.buf;
4899     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4900 //    printf("row %d at %X\n", s->mb_y, (int)s);
4901         s->mb_x=0;
4902         s->mb_y= mb_y;
4903
4904         ff_set_qscale(s, s->qscale);
4905         ff_init_block_index(s);
4906
4907         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4908             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4909             int mb_type= s->mb_type[xy];
4910 //            int d;
4911             int dmin= INT_MAX;
4912             int dir;
4913
4914             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4915                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4916                 return -1;
4917             }
4918             if(s->data_partitioning){
4919                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4920                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4921                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4922                     return -1;
4923                 }
4924             }
4925
4926             s->mb_x = mb_x;
4927             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4928             ff_update_block_index(s);
4929
4930 #ifdef CONFIG_H261_ENCODER
4931             if(s->codec_id == CODEC_ID_H261){
4932                 ff_h261_reorder_mb_index(s);
4933                 xy= s->mb_y*s->mb_stride + s->mb_x;
4934                 mb_type= s->mb_type[xy];
4935             }
4936 #endif
4937
4938             /* write gob / video packet header  */
4939             if(s->rtp_mode){
4940                 int current_packet_size, is_gob_start;
4941
4942                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4943
4944                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4945
4946                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4947
4948                 switch(s->codec_id){
4949                 case CODEC_ID_H263:
4950                 case CODEC_ID_H263P:
4951                     if(!s->h263_slice_structured)
4952                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4953                     break;
4954                 case CODEC_ID_MPEG2VIDEO:
4955                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4956                 case CODEC_ID_MPEG1VIDEO:
4957                     if(s->mb_skip_run) is_gob_start=0;
4958                     break;
4959                 }
4960
4961                 if(is_gob_start){
4962                     if(s->start_mb_y != mb_y || mb_x!=0){
4963                         write_slice_end(s);
4964
4965                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4966                             ff_mpeg4_init_partitions(s);
4967                         }
4968                     }
4969
4970                     assert((put_bits_count(&s->pb)&7) == 0);
4971                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4972
4973                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4974                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4975                         int d= 100 / s->avctx->error_rate;
4976                         if(r % d == 0){
4977                             current_packet_size=0;
4978 #ifndef ALT_BITSTREAM_WRITER
4979                             s->pb.buf_ptr= s->ptr_lastgob;
4980 #endif
4981                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4982                         }
4983                     }
4984
4985                     if (s->avctx->rtp_callback){
4986                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4987                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4988                     }
4989
4990                     switch(s->codec_id){
4991                     case CODEC_ID_MPEG4:
4992                         ff_mpeg4_encode_video_packet_header(s);
4993                         ff_mpeg4_clean_buffers(s);
4994                     break;
4995                     case CODEC_ID_MPEG1VIDEO:
4996                     case CODEC_ID_MPEG2VIDEO:
4997                         ff_mpeg1_encode_slice_header(s);
4998                         ff_mpeg1_clean_buffers(s);
4999                     break;
5000                     case CODEC_ID_H263:
5001                     case CODEC_ID_H263P:
5002                         h263_encode_gob_header(s, mb_y);
5003                     break;
5004                     }
5005
5006                     if(s->flags&CODEC_FLAG_PASS1){
5007                         int bits= put_bits_count(&s->pb);
5008                         s->misc_bits+= bits - s->last_bits;
5009                         s->last_bits= bits;
5010                     }
5011
5012                     s->ptr_lastgob += current_packet_size;
5013                     s->first_slice_line=1;
5014                     s->resync_mb_x=mb_x;
5015                     s->resync_mb_y=mb_y;
5016                 }
5017             }
5018
5019             if(  (s->resync_mb_x   == s->mb_x)
5020                && s->resync_mb_y+1 == s->mb_y){
5021                 s->first_slice_line=0;
5022             }
5023
5024             s->mb_skipped=0;
5025             s->dquant=0; //only for QP_RD
5026
5027             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5028                 int next_block=0;
5029                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5030
5031                 copy_context_before_encode(&backup_s, s, -1);
5032                 backup_s.pb= s->pb;
5033                 best_s.data_partitioning= s->data_partitioning;
5034                 best_s.partitioned_frame= s->partitioned_frame;
5035                 if(s->data_partitioning){
5036                     backup_s.pb2= s->pb2;
5037                     backup_s.tex_pb= s->tex_pb;
5038                 }
5039
5040                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5041                     s->mv_dir = MV_DIR_FORWARD;
5042                     s->mv_type = MV_TYPE_16X16;
5043                     s->mb_intra= 0;
5044                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5045                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5046                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5047                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5048                 }
5049                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5050                     s->mv_dir = MV_DIR_FORWARD;
5051                     s->mv_type = MV_TYPE_FIELD;
5052                     s->mb_intra= 0;
5053                     for(i=0; i<2; i++){
5054                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5055                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5056                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5057                     }
5058                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5059                                  &dmin, &next_block, 0, 0);
5060                 }
5061                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5062                     s->mv_dir = MV_DIR_FORWARD;
5063                     s->mv_type = MV_TYPE_16X16;
5064                     s->mb_intra= 0;
5065                     s->mv[0][0][0] = 0;
5066                     s->mv[0][0][1] = 0;
5067                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5068                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5069                 }
5070                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5071                     s->mv_dir = MV_DIR_FORWARD;
5072                     s->mv_type = MV_TYPE_8X8;
5073                     s->mb_intra= 0;
5074                     for(i=0; i<4; i++){
5075                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5076                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5077                     }
5078                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5079                                  &dmin, &next_block, 0, 0);
5080                 }
5081                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5082                     s->mv_dir = MV_DIR_FORWARD;
5083                     s->mv_type = MV_TYPE_16X16;
5084                     s->mb_intra= 0;
5085                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5086                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5087                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5088                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5089                 }
5090                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5091                     s->mv_dir = MV_DIR_BACKWARD;
5092                     s->mv_type = MV_TYPE_16X16;
5093                     s->mb_intra= 0;
5094                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5095                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5096                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5097                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5098                 }
5099                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5100                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5101                     s->mv_type = MV_TYPE_16X16;
5102                     s->mb_intra= 0;
5103                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5104                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5105                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5106                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5107                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5108                                  &dmin, &next_block, 0, 0);
5109                 }
5110                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5111                     int mx= s->b_direct_mv_table[xy][0];
5112                     int my= s->b_direct_mv_table[xy][1];
5113
5114                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5115                     s->mb_intra= 0;
5116                     ff_mpeg4_set_direct_mv(s, mx, my);
5117                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5118                                  &dmin, &next_block, mx, my);
5119                 }
5120                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5121                     s->mv_dir = MV_DIR_FORWARD;
5122                     s->mv_type = MV_TYPE_FIELD;
5123                     s->mb_intra= 0;
5124                     for(i=0; i<2; i++){
5125                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5126                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5127                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5128                     }
5129                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5130                                  &dmin, &next_block, 0, 0);
5131                 }
5132                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5133                     s->mv_dir = MV_DIR_BACKWARD;
5134                     s->mv_type = MV_TYPE_FIELD;
5135                     s->mb_intra= 0;
5136                     for(i=0; i<2; i++){
5137                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5138                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5139                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5140                     }
5141                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5142                                  &dmin, &next_block, 0, 0);
5143                 }
5144                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5145                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5146                     s->mv_type = MV_TYPE_FIELD;
5147                     s->mb_intra= 0;
5148                     for(dir=0; dir<2; dir++){
5149                         for(i=0; i<2; i++){
5150                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5151                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5152                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5153                         }
5154                     }
5155                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5156                                  &dmin, &next_block, 0, 0);
5157                 }
5158                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5159                     s->mv_dir = 0;
5160                     s->mv_type = MV_TYPE_16X16;
5161                     s->mb_intra= 1;
5162                     s->mv[0][0][0] = 0;
5163                     s->mv[0][0][1] = 0;
5164                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5165                                  &dmin, &next_block, 0, 0);
5166                     if(s->h263_pred || s->h263_aic){
5167                         if(best_s.mb_intra)
5168                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5169                         else
5170                             ff_clean_intra_table_entries(s); //old mode?
5171                     }
5172                 }
5173
5174                 if(s->flags & CODEC_FLAG_QP_RD){
5175                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5176                         const int last_qp= backup_s.qscale;
5177                         int dquant, dir, qp, dc[6];
5178                         DCTELEM ac[6][16];
5179                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5180
5181                         assert(backup_s.dquant == 0);
5182
5183                         //FIXME intra
5184                         s->mv_dir= best_s.mv_dir;
5185                         s->mv_type = MV_TYPE_16X16;
5186                         s->mb_intra= best_s.mb_intra;
5187                         s->mv[0][0][0] = best_s.mv[0][0][0];
5188                         s->mv[0][0][1] = best_s.mv[0][0][1];
5189                         s->mv[1][0][0] = best_s.mv[1][0][0];
5190                         s->mv[1][0][1] = best_s.mv[1][0][1];
5191
5192                         dir= s->pict_type == B_TYPE ? 2 : 1;
5193                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5194                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5195                             qp= last_qp + dquant;
5196                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5197                                 break;
5198                             backup_s.dquant= dquant;
5199                             if(s->mb_intra && s->dc_val[0]){
5200                                 for(i=0; i<6; i++){
5201                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5202                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5203                                 }
5204                             }
5205
5206                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5207                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5208                             if(best_s.qscale != qp){
5209                                 if(s->mb_intra && s->dc_val[0]){
5210                                     for(i=0; i<6; i++){
5211                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5212                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5213                                     }
5214                                 }
5215                                 if(dir > 0 && dquant==dir){
5216                                     dquant= 0;
5217                                     dir= -dir;
5218                                 }else
5219                                     break;
5220                             }
5221                         }
5222                         qp= best_s.qscale;
5223                         s->current_picture.qscale_table[xy]= qp;
5224                     }
5225                 }
5226
5227                 copy_context_after_encode(s, &best_s, -1);
5228
5229                 pb_bits_count= put_bits_count(&s->pb);
5230                 flush_put_bits(&s->pb);
5231                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5232                 s->pb= backup_s.pb;
5233
5234                 if(s->data_partitioning){
5235                     pb2_bits_count= put_bits_count(&s->pb2);
5236                     flush_put_bits(&s->pb2);
5237                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5238                     s->pb2= backup_s.pb2;
5239
5240                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5241                     flush_put_bits(&s->tex_pb);
5242                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5243                     s->tex_pb= backup_s.tex_pb;
5244                 }
5245                 s->last_bits= put_bits_count(&s->pb);
5246
5247                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5248                     ff_h263_update_motion_val(s);
5249
5250                 if(next_block==0){ //FIXME 16 vs linesize16
5251                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5252                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5253                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5254                 }
5255
5256                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5257                     MPV_decode_mb(s, s->block);
5258             } else {
5259                 int motion_x, motion_y;
5260                 s->mv_type=MV_TYPE_16X16;
5261                 // only one MB-Type possible
5262
5263                 switch(mb_type){
5264                 case CANDIDATE_MB_TYPE_INTRA:
5265                     s->mv_dir = 0;
5266                     s->mb_intra= 1;
5267                     motion_x= s->mv[0][0][0] = 0;
5268                     motion_y= s->mv[0][0][1] = 0;
5269                     break;
5270                 case CANDIDATE_MB_TYPE_INTER:
5271                     s->mv_dir = MV_DIR_FORWARD;
5272                     s->mb_intra= 0;
5273                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5274                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5275                     break;
5276                 case CANDIDATE_MB_TYPE_INTER_I:
5277                     s->mv_dir = MV_DIR_FORWARD;
5278                     s->mv_type = MV_TYPE_FIELD;
5279                     s->mb_intra= 0;
5280                     for(i=0; i<2; i++){
5281                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5282                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5283                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5284                     }
5285                     motion_x = motion_y = 0;
5286                     break;
5287                 case CANDIDATE_MB_TYPE_INTER4V:
5288                     s->mv_dir = MV_DIR_FORWARD;
5289                     s->mv_type = MV_TYPE_8X8;
5290                     s->mb_intra= 0;
5291                     for(i=0; i<4; i++){
5292                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5293                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5294                     }
5295                     motion_x= motion_y= 0;
5296                     break;
5297                 case CANDIDATE_MB_TYPE_DIRECT:
5298                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5299                     s->mb_intra= 0;
5300                     motion_x=s->b_direct_mv_table[xy][0];
5301                     motion_y=s->b_direct_mv_table[xy][1];
5302                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5303                     break;
5304                 case CANDIDATE_MB_TYPE_BIDIR:
5305                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5306                     s->mb_intra= 0;
5307                     motion_x=0;
5308                     motion_y=0;
5309                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5310                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5311                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5312                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5313                     break;
5314                 case CANDIDATE_MB_TYPE_BACKWARD:
5315                     s->mv_dir = MV_DIR_BACKWARD;
5316                     s->mb_intra= 0;
5317                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5318                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5319                     break;
5320                 case CANDIDATE_MB_TYPE_FORWARD:
5321                     s->mv_dir = MV_DIR_FORWARD;
5322                     s->mb_intra= 0;
5323                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5324                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5325 //                    printf(" %d %d ", motion_x, motion_y);
5326                     break;
5327                 case CANDIDATE_MB_TYPE_FORWARD_I:
5328                     s->mv_dir = MV_DIR_FORWARD;
5329                     s->mv_type = MV_TYPE_FIELD;
5330                     s->mb_intra= 0;
5331                     for(i=0; i<2; i++){
5332                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5333                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5334                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5335                     }
5336                     motion_x=motion_y=0;
5337                     break;
5338                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5339                     s->mv_dir = MV_DIR_BACKWARD;
5340                     s->mv_type = MV_TYPE_FIELD;
5341                     s->mb_intra= 0;
5342                     for(i=0; i<2; i++){
5343                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5344                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5345                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5346                     }
5347                     motion_x=motion_y=0;
5348                     break;
5349                 case CANDIDATE_MB_TYPE_BIDIR_I:
5350                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5351                     s->mv_type = MV_TYPE_FIELD;
5352                     s->mb_intra= 0;
5353                     for(dir=0; dir<2; dir++){
5354                         for(i=0; i<2; i++){
5355                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5356                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5357                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5358                         }
5359                     }
5360                     motion_x=motion_y=0;
5361                     break;
5362                 default:
5363                     motion_x=motion_y=0; //gcc warning fix
5364                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5365                 }
5366
5367                 encode_mb(s, motion_x, motion_y);
5368
5369                 // RAL: Update last macroblock type
5370                 s->last_mv_dir = s->mv_dir;
5371
5372                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5373                     ff_h263_update_motion_val(s);
5374
5375                 MPV_decode_mb(s, s->block);
5376             }
5377
5378             /* clean the MV table in IPS frames for direct mode in B frames */
5379             if(s->mb_intra /* && I,P,S_TYPE */){
5380                 s->p_mv_table[xy][0]=0;
5381                 s->p_mv_table[xy][1]=0;
5382             }
5383
5384             if(s->flags&CODEC_FLAG_PSNR){
5385                 int w= 16;
5386                 int h= 16;
5387
5388                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5389                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5390
5391                 s->current_picture.error[0] += sse(
5392                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5393                     s->dest[0], w, h, s->linesize);
5394                 s->current_picture.error[1] += sse(
5395                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5396                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5397                 s->current_picture.error[2] += sse(
5398                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5399                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5400             }
5401             if(s->loop_filter){
5402                 if(s->out_format == FMT_H263)
5403                     ff_h263_loop_filter(s);
5404             }
5405 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5406         }
5407     }
5408
5409     //not beautiful here but we must write it before flushing so it has to be here
5410     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5411         msmpeg4_encode_ext_header(s);
5412
5413     write_slice_end(s);
5414
5415     /* Send the last GOB if RTP */
5416     if (s->avctx->rtp_callback) {
5417         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5418         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5419         /* Call the RTP callback to send the last GOB */
5420         emms_c();
5421         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5422     }
5423
5424     return 0;
5425 }
5426
5427 #define MERGE(field) dst->field += src->field; src->field=0
5428 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5429     MERGE(me.scene_change_score);
5430     MERGE(me.mc_mb_var_sum_temp);
5431     MERGE(me.mb_var_sum_temp);
5432 }
5433
5434 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5435     int i;
5436
5437     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5438     MERGE(dct_count[1]);
5439     MERGE(mv_bits);
5440     MERGE(i_tex_bits);
5441     MERGE(p_tex_bits);
5442     MERGE(i_count);
5443     MERGE(f_count);
5444     MERGE(b_count);
5445     MERGE(skip_count);
5446     MERGE(misc_bits);
5447     MERGE(error_count);
5448     MERGE(padding_bug_score);
5449     MERGE(current_picture.error[0]);
5450     MERGE(current_picture.error[1]);
5451     MERGE(current_picture.error[2]);
5452
5453     if(dst->avctx->noise_reduction){
5454         for(i=0; i<64; i++){
5455             MERGE(dct_error_sum[0][i]);
5456             MERGE(dct_error_sum[1][i]);
5457         }
5458     }
5459
5460     assert(put_bits_count(&src->pb) % 8 ==0);
5461     assert(put_bits_count(&dst->pb) % 8 ==0);
5462     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5463     flush_put_bits(&dst->pb);
5464 }
5465
5466 static void estimate_qp(MpegEncContext *s, int dry_run){
5467     if (!s->fixed_qscale)
5468         s->current_picture_ptr->quality=
5469         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5470
5471     if(s->adaptive_quant){
5472         switch(s->codec_id){
5473         case CODEC_ID_MPEG4:
5474             ff_clean_mpeg4_qscales(s);
5475             break;
5476         case CODEC_ID_H263:
5477         case CODEC_ID_H263P:
5478         case CODEC_ID_FLV1:
5479             ff_clean_h263_qscales(s);
5480             break;
5481         }
5482
5483         s->lambda= s->lambda_table[0];
5484         //FIXME broken
5485     }else
5486         s->lambda= s->current_picture.quality;
5487 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5488     update_qscale(s);
5489 }
5490
5491 static void encode_picture(MpegEncContext *s, int picture_number)
5492 {
5493     int i;
5494     int bits;
5495
5496     s->picture_number = picture_number;
5497
5498     /* Reset the average MB variance */
5499     s->me.mb_var_sum_temp    =
5500     s->me.mc_mb_var_sum_temp = 0;
5501
5502     /* we need to initialize some time vars before we can encode b-frames */
5503     // RAL: Condition added for MPEG1VIDEO
5504     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5505         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5506
5507     s->me.scene_change_score=0;
5508
5509 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5510
5511     if(s->pict_type==I_TYPE){
5512         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5513         else                        s->no_rounding=0;
5514     }else if(s->pict_type!=B_TYPE){
5515         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5516             s->no_rounding ^= 1;
5517     }
5518
5519     if(s->flags & CODEC_FLAG_PASS2){
5520         estimate_qp(s, 1);
5521         ff_get_2pass_fcode(s);
5522     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5523         if(s->pict_type==B_TYPE)
5524             s->lambda= s->last_lambda_for[s->pict_type];
5525         else
5526             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5527         update_qscale(s);
5528     }
5529
5530     s->mb_intra=0; //for the rate distortion & bit compare functions
5531     for(i=1; i<s->avctx->thread_count; i++){
5532         ff_update_duplicate_context(s->thread_context[i], s);
5533     }
5534
5535     ff_init_me(s);
5536
5537     /* Estimate motion for every MB */
5538     if(s->pict_type != I_TYPE){
5539         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5540         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5541         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5542             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5543                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5544             }
5545         }
5546
5547         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5548     }else /* if(s->pict_type == I_TYPE) */{
5549         /* I-Frame */
5550         for(i=0; i<s->mb_stride*s->mb_height; i++)
5551             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5552
5553         if(!s->fixed_qscale){
5554             /* finding spatial complexity for I-frame rate control */
5555             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5556         }
5557     }
5558     for(i=1; i<s->avctx->thread_count; i++){
5559         merge_context_after_me(s, s->thread_context[i]);
5560     }
5561     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5562     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5563     emms_c();
5564
5565     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5566         s->pict_type= I_TYPE;
5567         for(i=0; i<s->mb_stride*s->mb_height; i++)
5568             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5569 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5570     }
5571
5572     if(!s->umvplus){
5573         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5574             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5575
5576             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5577                 int a,b;
5578                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5579                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5580                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5581             }
5582
5583             ff_fix_long_p_mvs(s);
5584             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5585             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5586                 int j;
5587                 for(i=0; i<2; i++){
5588                     for(j=0; j<2; j++)
5589                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5590                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5591                 }
5592             }
5593         }
5594
5595         if(s->pict_type==B_TYPE){
5596             int a, b;
5597
5598             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5599             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5600             s->f_code = FFMAX(a, b);
5601
5602             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5603             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5604             s->b_code = FFMAX(a, b);
5605
5606             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5607             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5608             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5609             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5610             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5611                 int dir, j;
5612                 for(dir=0; dir<2; dir++){
5613                     for(i=0; i<2; i++){
5614                         for(j=0; j<2; j++){
5615                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5616                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5617                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5618                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5619                         }
5620                     }
5621                 }
5622             }
5623         }
5624     }
5625
5626     estimate_qp(s, 0);
5627
5628     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5629         s->qscale= 3; //reduce clipping problems
5630
5631     if (s->out_format == FMT_MJPEG) {
5632         /* for mjpeg, we do include qscale in the matrix */
5633         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5634         for(i=1;i<64;i++){
5635             int j= s->dsp.idct_permutation[i];
5636
5637             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5638         }
5639         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5640                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5641         s->qscale= 8;
5642     }
5643
5644     //FIXME var duplication
5645     s->current_picture_ptr->key_frame=
5646     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5647     s->current_picture_ptr->pict_type=
5648     s->current_picture.pict_type= s->pict_type;
5649
5650     if(s->current_picture.key_frame)
5651         s->picture_in_gop_number=0;
5652
5653     s->last_bits= put_bits_count(&s->pb);
5654     switch(s->out_format) {
5655     case FMT_MJPEG:
5656         mjpeg_picture_header(s);
5657         break;
5658 #ifdef CONFIG_H261_ENCODER
5659     case FMT_H261:
5660         ff_h261_encode_picture_header(s, picture_number);
5661         break;
5662 #endif
5663     case FMT_H263:
5664         if (s->codec_id == CODEC_ID_WMV2)
5665             ff_wmv2_encode_picture_header(s, picture_number);
5666         else if (s->h263_msmpeg4)
5667             msmpeg4_encode_picture_header(s, picture_number);
5668         else if (s->h263_pred)
5669             mpeg4_encode_picture_header(s, picture_number);
5670 #ifdef CONFIG_RV10_ENCODER
5671         else if (s->codec_id == CODEC_ID_RV10)
5672             rv10_encode_picture_header(s, picture_number);
5673 #endif
5674 #ifdef CONFIG_RV20_ENCODER
5675         else if (s->codec_id == CODEC_ID_RV20)
5676             rv20_encode_picture_header(s, picture_number);
5677 #endif
5678         else if (s->codec_id == CODEC_ID_FLV1)
5679             ff_flv_encode_picture_header(s, picture_number);
5680         else
5681             h263_encode_picture_header(s, picture_number);
5682         break;
5683     case FMT_MPEG1:
5684         mpeg1_encode_picture_header(s, picture_number);
5685         break;
5686     case FMT_H264:
5687         break;
5688     default:
5689         assert(0);
5690     }
5691     bits= put_bits_count(&s->pb);
5692     s->header_bits= bits - s->last_bits;
5693
5694     for(i=1; i<s->avctx->thread_count; i++){
5695         update_duplicate_context_after_me(s->thread_context[i], s);
5696     }
5697     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5698     for(i=1; i<s->avctx->thread_count; i++){
5699         merge_context_after_encode(s, s->thread_context[i]);
5700     }
5701     emms_c();
5702 }
5703
5704 #endif //CONFIG_ENCODERS
5705
5706 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5707     const int intra= s->mb_intra;
5708     int i;
5709
5710     s->dct_count[intra]++;
5711
5712     for(i=0; i<64; i++){
5713         int level= block[i];
5714
5715         if(level){
5716             if(level>0){
5717                 s->dct_error_sum[intra][i] += level;
5718                 level -= s->dct_offset[intra][i];
5719                 if(level<0) level=0;
5720             }else{
5721                 s->dct_error_sum[intra][i] -= level;
5722                 level += s->dct_offset[intra][i];
5723                 if(level>0) level=0;
5724             }
5725             block[i]= level;
5726         }
5727     }
5728 }
5729
5730 #ifdef CONFIG_ENCODERS
5731
5732 static int dct_quantize_trellis_c(MpegEncContext *s,
5733                         DCTELEM *block, int n,
5734                         int qscale, int *overflow){
5735     const int *qmat;
5736     const uint8_t *scantable= s->intra_scantable.scantable;
5737     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5738     int max=0;
5739     unsigned int threshold1, threshold2;
5740     int bias=0;
5741     int run_tab[65];
5742     int level_tab[65];
5743     int score_tab[65];
5744     int survivor[65];
5745     int survivor_count;
5746     int last_run=0;
5747     int last_level=0;
5748     int last_score= 0;
5749     int last_i;
5750     int coeff[2][64];
5751     int coeff_count[64];
5752     int qmul, qadd, start_i, last_non_zero, i, dc;
5753     const int esc_length= s->ac_esc_length;
5754     uint8_t * length;
5755     uint8_t * last_length;
5756     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5757
5758     s->dsp.fdct (block);
5759
5760     if(s->dct_error_sum)
5761         s->denoise_dct(s, block);
5762     qmul= qscale*16;
5763     qadd= ((qscale-1)|1)*8;
5764
5765     if (s->mb_intra) {
5766         int q;
5767         if (!s->h263_aic) {
5768             if (n < 4)
5769                 q = s->y_dc_scale;
5770             else
5771                 q = s->c_dc_scale;
5772             q = q << 3;
5773         } else{
5774             /* For AIC we skip quant/dequant of INTRADC */
5775             q = 1 << 3;
5776             qadd=0;
5777         }
5778
5779         /* note: block[0] is assumed to be positive */
5780         block[0] = (block[0] + (q >> 1)) / q;
5781         start_i = 1;
5782         last_non_zero = 0;
5783         qmat = s->q_intra_matrix[qscale];
5784         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5785             bias= 1<<(QMAT_SHIFT-1);
5786         length     = s->intra_ac_vlc_length;
5787         last_length= s->intra_ac_vlc_last_length;
5788     } else {
5789         start_i = 0;
5790         last_non_zero = -1;
5791         qmat = s->q_inter_matrix[qscale];
5792         length     = s->inter_ac_vlc_length;
5793         last_length= s->inter_ac_vlc_last_length;
5794     }
5795     last_i= start_i;
5796
5797     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5798     threshold2= (threshold1<<1);
5799
5800     for(i=63; i>=start_i; i--) {
5801         const int j = scantable[i];
5802         int level = block[j] * qmat[j];
5803
5804         if(((unsigned)(level+threshold1))>threshold2){
5805             last_non_zero = i;
5806             break;
5807         }
5808     }
5809
5810     for(i=start_i; i<=last_non_zero; i++) {
5811         const int j = scantable[i];
5812         int level = block[j] * qmat[j];
5813
5814 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5815 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5816         if(((unsigned)(level+threshold1))>threshold2){
5817             if(level>0){
5818                 level= (bias + level)>>QMAT_SHIFT;
5819                 coeff[0][i]= level;
5820                 coeff[1][i]= level-1;
5821 //                coeff[2][k]= level-2;
5822             }else{
5823                 level= (bias - level)>>QMAT_SHIFT;
5824                 coeff[0][i]= -level;
5825                 coeff[1][i]= -level+1;
5826 //                coeff[2][k]= -level+2;
5827             }
5828             coeff_count[i]= FFMIN(level, 2);
5829             assert(coeff_count[i]);
5830             max |=level;
5831         }else{
5832             coeff[0][i]= (level>>31)|1;
5833             coeff_count[i]= 1;
5834         }
5835     }
5836
5837     *overflow= s->max_qcoeff < max; //overflow might have happened
5838
5839     if(last_non_zero < start_i){
5840         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5841         return last_non_zero;
5842     }
5843
5844     score_tab[start_i]= 0;
5845     survivor[0]= start_i;
5846     survivor_count= 1;
5847
5848     for(i=start_i; i<=last_non_zero; i++){
5849         int level_index, j;
5850         const int dct_coeff= ABS(block[ scantable[i] ]);
5851         const int zero_distoration= dct_coeff*dct_coeff;
5852         int best_score=256*256*256*120;
5853         for(level_index=0; level_index < coeff_count[i]; level_index++){
5854             int distoration;
5855             int level= coeff[level_index][i];
5856             const int alevel= ABS(level);
5857             int unquant_coeff;
5858
5859             assert(level);
5860
5861             if(s->out_format == FMT_H263){
5862                 unquant_coeff= alevel*qmul + qadd;
5863             }else{ //MPEG1
5864                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5865                 if(s->mb_intra){
5866                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5867                         unquant_coeff =   (unquant_coeff - 1) | 1;
5868                 }else{
5869                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5870                         unquant_coeff =   (unquant_coeff - 1) | 1;
5871                 }
5872                 unquant_coeff<<= 3;
5873             }
5874
5875             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5876             level+=64;
5877             if((level&(~127)) == 0){
5878                 for(j=survivor_count-1; j>=0; j--){
5879                     int run= i - survivor[j];
5880                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5881                     score += score_tab[i-run];
5882
5883                     if(score < best_score){
5884                         best_score= score;
5885                         run_tab[i+1]= run;
5886                         level_tab[i+1]= level-64;
5887                     }
5888                 }
5889
5890                 if(s->out_format == FMT_H263){
5891                     for(j=survivor_count-1; j>=0; j--){
5892                         int run= i - survivor[j];
5893                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5894                         score += score_tab[i-run];
5895                         if(score < last_score){
5896                             last_score= score;
5897                             last_run= run;
5898                             last_level= level-64;
5899                             last_i= i+1;
5900                         }
5901                     }
5902                 }
5903             }else{
5904                 distoration += esc_length*lambda;
5905                 for(j=survivor_count-1; j>=0; j--){
5906                     int run= i - survivor[j];
5907                     int score= distoration + score_tab[i-run];
5908
5909                     if(score < best_score){
5910                         best_score= score;
5911                         run_tab[i+1]= run;
5912                         level_tab[i+1]= level-64;
5913                     }
5914                 }
5915
5916                 if(s->out_format == FMT_H263){
5917                   for(j=survivor_count-1; j>=0; j--){
5918                         int run= i - survivor[j];
5919                         int score= distoration + score_tab[i-run];
5920                         if(score < last_score){
5921                             last_score= score;
5922                             last_run= run;
5923                             last_level= level-64;
5924                             last_i= i+1;
5925                         }
5926                     }
5927                 }
5928             }
5929         }
5930
5931         score_tab[i+1]= best_score;
5932
5933         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5934         if(last_non_zero <= 27){
5935             for(; survivor_count; survivor_count--){
5936                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5937                     break;
5938             }
5939         }else{
5940             for(; survivor_count; survivor_count--){
5941                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5942                     break;
5943             }
5944         }
5945
5946         survivor[ survivor_count++ ]= i+1;
5947     }
5948
5949     if(s->out_format != FMT_H263){
5950         last_score= 256*256*256*120;
5951         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5952             int score= score_tab[i];
5953             if(i) score += lambda*2; //FIXME exacter?
5954
5955             if(score < last_score){
5956                 last_score= score;
5957                 last_i= i;
5958                 last_level= level_tab[i];
5959                 last_run= run_tab[i];
5960             }
5961         }
5962     }
5963
5964     s->coded_score[n] = last_score;
5965
5966     dc= ABS(block[0]);
5967     last_non_zero= last_i - 1;
5968     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5969
5970     if(last_non_zero < start_i)
5971         return last_non_zero;
5972
5973     if(last_non_zero == 0 && start_i == 0){
5974         int best_level= 0;
5975         int best_score= dc * dc;
5976
5977         for(i=0; i<coeff_count[0]; i++){
5978             int level= coeff[i][0];
5979             int alevel= ABS(level);
5980             int unquant_coeff, score, distortion;
5981
5982             if(s->out_format == FMT_H263){
5983                     unquant_coeff= (alevel*qmul + qadd)>>3;
5984             }else{ //MPEG1
5985                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5986                     unquant_coeff =   (unquant_coeff - 1) | 1;
5987             }
5988             unquant_coeff = (unquant_coeff + 4) >> 3;
5989             unquant_coeff<<= 3 + 3;
5990
5991             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5992             level+=64;
5993             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
5994             else                    score= distortion + esc_length*lambda;
5995
5996             if(score < best_score){
5997                 best_score= score;
5998                 best_level= level - 64;
5999             }
6000         }
6001         block[0]= best_level;
6002         s->coded_score[n] = best_score - dc*dc;
6003         if(best_level == 0) return -1;
6004         else                return last_non_zero;
6005     }
6006
6007     i= last_i;
6008     assert(last_level);
6009
6010     block[ perm_scantable[last_non_zero] ]= last_level;
6011     i -= last_run + 1;
6012
6013     for(; i>start_i; i -= run_tab[i] + 1){
6014         block[ perm_scantable[i-1] ]= level_tab[i];
6015     }
6016
6017     return last_non_zero;
6018 }
6019
6020 //#define REFINE_STATS 1
6021 static int16_t basis[64][64];
6022
6023 static void build_basis(uint8_t *perm){
6024     int i, j, x, y;
6025     emms_c();
6026     for(i=0; i<8; i++){
6027         for(j=0; j<8; j++){
6028             for(y=0; y<8; y++){
6029                 for(x=0; x<8; x++){
6030                     double s= 0.25*(1<<BASIS_SHIFT);
6031                     int index= 8*i + j;
6032                     int perm_index= perm[index];
6033                     if(i==0) s*= sqrt(0.5);
6034                     if(j==0) s*= sqrt(0.5);
6035                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6036                 }
6037             }
6038         }
6039     }
6040 }
6041
6042 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6043                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6044                         int n, int qscale){
6045     int16_t rem[64];
6046     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6047     const int *qmat;
6048     const uint8_t *scantable= s->intra_scantable.scantable;
6049     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6050 //    unsigned int threshold1, threshold2;
6051 //    int bias=0;
6052     int run_tab[65];
6053     int prev_run=0;
6054     int prev_level=0;
6055     int qmul, qadd, start_i, last_non_zero, i, dc;
6056     uint8_t * length;
6057     uint8_t * last_length;
6058     int lambda;
6059     int rle_index, run, q, sum;
6060 #ifdef REFINE_STATS
6061 static int count=0;
6062 static int after_last=0;
6063 static int to_zero=0;
6064 static int from_zero=0;
6065 static int raise=0;
6066 static int lower=0;
6067 static int messed_sign=0;
6068 #endif
6069
6070     if(basis[0][0] == 0)
6071         build_basis(s->dsp.idct_permutation);
6072
6073     qmul= qscale*2;
6074     qadd= (qscale-1)|1;
6075     if (s->mb_intra) {
6076         if (!s->h263_aic) {
6077             if (n < 4)
6078                 q = s->y_dc_scale;
6079             else
6080                 q = s->c_dc_scale;
6081         } else{
6082             /* For AIC we skip quant/dequant of INTRADC */
6083             q = 1;
6084             qadd=0;
6085         }
6086         q <<= RECON_SHIFT-3;
6087         /* note: block[0] is assumed to be positive */
6088         dc= block[0]*q;
6089 //        block[0] = (block[0] + (q >> 1)) / q;
6090         start_i = 1;
6091         qmat = s->q_intra_matrix[qscale];
6092 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6093 //            bias= 1<<(QMAT_SHIFT-1);
6094         length     = s->intra_ac_vlc_length;
6095         last_length= s->intra_ac_vlc_last_length;
6096     } else {
6097         dc= 0;
6098         start_i = 0;
6099         qmat = s->q_inter_matrix[qscale];
6100         length     = s->inter_ac_vlc_length;
6101         last_length= s->inter_ac_vlc_last_length;
6102     }
6103     last_non_zero = s->block_last_index[n];
6104
6105 #ifdef REFINE_STATS
6106 {START_TIMER
6107 #endif
6108     dc += (1<<(RECON_SHIFT-1));
6109     for(i=0; i<64; i++){
6110         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6111     }
6112 #ifdef REFINE_STATS
6113 STOP_TIMER("memset rem[]")}
6114 #endif
6115     sum=0;
6116     for(i=0; i<64; i++){
6117         int one= 36;
6118         int qns=4;
6119         int w;
6120
6121         w= ABS(weight[i]) + qns*one;
6122         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6123
6124         weight[i] = w;
6125 //        w=weight[i] = (63*qns + (w/2)) / w;
6126
6127         assert(w>0);
6128         assert(w<(1<<6));
6129         sum += w*w;
6130     }
6131     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6132 #ifdef REFINE_STATS
6133 {START_TIMER
6134 #endif
6135     run=0;
6136     rle_index=0;
6137     for(i=start_i; i<=last_non_zero; i++){
6138         int j= perm_scantable[i];
6139         const int level= block[j];
6140         int coeff;
6141
6142         if(level){
6143             if(level<0) coeff= qmul*level - qadd;
6144             else        coeff= qmul*level + qadd;
6145             run_tab[rle_index++]=run;
6146             run=0;
6147
6148             s->dsp.add_8x8basis(rem, basis[j], coeff);
6149         }else{
6150             run++;
6151         }
6152     }
6153 #ifdef REFINE_STATS
6154 if(last_non_zero>0){
6155 STOP_TIMER("init rem[]")
6156 }
6157 }
6158
6159 {START_TIMER
6160 #endif
6161     for(;;){
6162         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6163         int best_coeff=0;
6164         int best_change=0;
6165         int run2, best_unquant_change=0, analyze_gradient;
6166 #ifdef REFINE_STATS
6167 {START_TIMER
6168 #endif
6169         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6170
6171         if(analyze_gradient){
6172 #ifdef REFINE_STATS
6173 {START_TIMER
6174 #endif
6175             for(i=0; i<64; i++){
6176                 int w= weight[i];
6177
6178                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6179             }
6180 #ifdef REFINE_STATS
6181 STOP_TIMER("rem*w*w")}
6182 {START_TIMER
6183 #endif
6184             s->dsp.fdct(d1);
6185 #ifdef REFINE_STATS
6186 STOP_TIMER("dct")}
6187 #endif
6188         }
6189
6190         if(start_i){
6191             const int level= block[0];
6192             int change, old_coeff;
6193
6194             assert(s->mb_intra);
6195
6196             old_coeff= q*level;
6197
6198             for(change=-1; change<=1; change+=2){
6199                 int new_level= level + change;
6200                 int score, new_coeff;
6201
6202                 new_coeff= q*new_level;
6203                 if(new_coeff >= 2048 || new_coeff < 0)
6204                     continue;
6205
6206                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6207                 if(score<best_score){
6208                     best_score= score;
6209                     best_coeff= 0;
6210                     best_change= change;
6211                     best_unquant_change= new_coeff - old_coeff;
6212                 }
6213             }
6214         }
6215
6216         run=0;
6217         rle_index=0;
6218         run2= run_tab[rle_index++];
6219         prev_level=0;
6220         prev_run=0;
6221
6222         for(i=start_i; i<64; i++){
6223             int j= perm_scantable[i];
6224             const int level= block[j];
6225             int change, old_coeff;
6226
6227             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6228                 break;
6229
6230             if(level){
6231                 if(level<0) old_coeff= qmul*level - qadd;
6232                 else        old_coeff= qmul*level + qadd;
6233                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6234             }else{
6235                 old_coeff=0;
6236                 run2--;
6237                 assert(run2>=0 || i >= last_non_zero );
6238             }
6239
6240             for(change=-1; change<=1; change+=2){
6241                 int new_level= level + change;
6242                 int score, new_coeff, unquant_change;
6243
6244                 score=0;
6245                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6246                    continue;
6247
6248                 if(new_level){
6249                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6250                     else            new_coeff= qmul*new_level + qadd;
6251                     if(new_coeff >= 2048 || new_coeff <= -2048)
6252                         continue;
6253                     //FIXME check for overflow
6254
6255                     if(level){
6256                         if(level < 63 && level > -63){
6257                             if(i < last_non_zero)
6258                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6259                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6260                             else
6261                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6262                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6263                         }
6264                     }else{
6265                         assert(ABS(new_level)==1);
6266
6267                         if(analyze_gradient){
6268                             int g= d1[ scantable[i] ];
6269                             if(g && (g^new_level) >= 0)
6270                                 continue;
6271                         }
6272
6273                         if(i < last_non_zero){
6274                             int next_i= i + run2 + 1;
6275                             int next_level= block[ perm_scantable[next_i] ] + 64;
6276
6277                             if(next_level&(~127))
6278                                 next_level= 0;
6279
6280                             if(next_i < last_non_zero)
6281                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6282                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6283                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6284                             else
6285                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6286                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6287                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6288                         }else{
6289                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6290                             if(prev_level){
6291                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6292                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6293                             }
6294                         }
6295                     }
6296                 }else{
6297                     new_coeff=0;
6298                     assert(ABS(level)==1);
6299
6300                     if(i < last_non_zero){
6301                         int next_i= i + run2 + 1;
6302                         int next_level= block[ perm_scantable[next_i] ] + 64;
6303
6304                         if(next_level&(~127))
6305                             next_level= 0;
6306
6307                         if(next_i < last_non_zero)
6308                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6309                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6310                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6311                         else
6312                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6313                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6314                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6315                     }else{
6316                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6317                         if(prev_level){
6318                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6319                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6320                         }
6321                     }
6322                 }
6323
6324                 score *= lambda;
6325
6326                 unquant_change= new_coeff - old_coeff;
6327                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6328
6329                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6330                 if(score<best_score){
6331                     best_score= score;
6332                     best_coeff= i;
6333                     best_change= change;
6334                     best_unquant_change= unquant_change;
6335                 }
6336             }
6337             if(level){
6338                 prev_level= level + 64;
6339                 if(prev_level&(~127))
6340                     prev_level= 0;
6341                 prev_run= run;
6342                 run=0;
6343             }else{
6344                 run++;
6345             }
6346         }
6347 #ifdef REFINE_STATS
6348 STOP_TIMER("iterative step")}
6349 #endif
6350
6351         if(best_change){
6352             int j= perm_scantable[ best_coeff ];
6353
6354             block[j] += best_change;
6355
6356             if(best_coeff > last_non_zero){
6357                 last_non_zero= best_coeff;
6358                 assert(block[j]);
6359 #ifdef REFINE_STATS
6360 after_last++;
6361 #endif
6362             }else{
6363 #ifdef REFINE_STATS
6364 if(block[j]){
6365     if(block[j] - best_change){
6366         if(ABS(block[j]) > ABS(block[j] - best_change)){
6367             raise++;
6368         }else{
6369             lower++;
6370         }
6371     }else{
6372         from_zero++;
6373     }
6374 }else{
6375     to_zero++;
6376 }
6377 #endif
6378                 for(; last_non_zero>=start_i; last_non_zero--){
6379                     if(block[perm_scantable[last_non_zero]])
6380                         break;
6381                 }
6382             }
6383 #ifdef REFINE_STATS
6384 count++;
6385 if(256*256*256*64 % count == 0){
6386     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6387 }
6388 #endif
6389             run=0;
6390             rle_index=0;
6391             for(i=start_i; i<=last_non_zero; i++){
6392                 int j= perm_scantable[i];
6393                 const int level= block[j];
6394
6395                  if(level){
6396                      run_tab[rle_index++]=run;
6397                      run=0;
6398                  }else{
6399                      run++;
6400                  }
6401             }
6402
6403             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6404         }else{
6405             break;
6406         }
6407     }
6408 #ifdef REFINE_STATS
6409 if(last_non_zero>0){
6410 STOP_TIMER("iterative search")
6411 }
6412 }
6413 #endif
6414
6415     return last_non_zero;
6416 }
6417
6418 static int dct_quantize_c(MpegEncContext *s,
6419                         DCTELEM *block, int n,
6420                         int qscale, int *overflow)
6421 {
6422     int i, j, level, last_non_zero, q, start_i;
6423     const int *qmat;
6424     const uint8_t *scantable= s->intra_scantable.scantable;
6425     int bias;
6426     int max=0;
6427     unsigned int threshold1, threshold2;
6428
6429     s->dsp.fdct (block);
6430
6431     if(s->dct_error_sum)
6432         s->denoise_dct(s, block);
6433
6434     if (s->mb_intra) {
6435         if (!s->h263_aic) {
6436             if (n < 4)
6437                 q = s->y_dc_scale;
6438             else
6439                 q = s->c_dc_scale;
6440             q = q << 3;
6441         } else
6442             /* For AIC we skip quant/dequant of INTRADC */
6443             q = 1 << 3;
6444
6445         /* note: block[0] is assumed to be positive */
6446         block[0] = (block[0] + (q >> 1)) / q;
6447         start_i = 1;
6448         last_non_zero = 0;
6449         qmat = s->q_intra_matrix[qscale];
6450         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6451     } else {
6452         start_i = 0;
6453         last_non_zero = -1;
6454         qmat = s->q_inter_matrix[qscale];
6455         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6456     }
6457     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6458     threshold2= (threshold1<<1);
6459     for(i=63;i>=start_i;i--) {
6460         j = scantable[i];
6461         level = block[j] * qmat[j];
6462
6463         if(((unsigned)(level+threshold1))>threshold2){
6464             last_non_zero = i;
6465             break;
6466         }else{
6467             block[j]=0;
6468         }
6469     }
6470     for(i=start_i; i<=last_non_zero; i++) {
6471         j = scantable[i];
6472         level = block[j] * qmat[j];
6473
6474 //        if(   bias+level >= (1<<QMAT_SHIFT)
6475 //           || bias-level >= (1<<QMAT_SHIFT)){
6476         if(((unsigned)(level+threshold1))>threshold2){
6477             if(level>0){
6478                 level= (bias + level)>>QMAT_SHIFT;
6479                 block[j]= level;
6480             }else{
6481                 level= (bias - level)>>QMAT_SHIFT;
6482                 block[j]= -level;
6483             }
6484             max |=level;
6485         }else{
6486             block[j]=0;
6487         }
6488     }
6489     *overflow= s->max_qcoeff < max; //overflow might have happened
6490
6491     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6492     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6493         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6494
6495     return last_non_zero;
6496 }
6497
6498 #endif //CONFIG_ENCODERS
6499
6500 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6501                                    DCTELEM *block, int n, int qscale)
6502 {
6503     int i, level, nCoeffs;
6504     const uint16_t *quant_matrix;
6505
6506     nCoeffs= s->block_last_index[n];
6507
6508     if (n < 4)
6509         block[0] = block[0] * s->y_dc_scale;
6510     else
6511         block[0] = block[0] * s->c_dc_scale;
6512     /* XXX: only mpeg1 */
6513     quant_matrix = s->intra_matrix;
6514     for(i=1;i<=nCoeffs;i++) {
6515         int j= s->intra_scantable.permutated[i];
6516         level = block[j];
6517         if (level) {
6518             if (level < 0) {
6519                 level = -level;
6520                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6521                 level = (level - 1) | 1;
6522                 level = -level;
6523             } else {
6524                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6525                 level = (level - 1) | 1;
6526             }
6527             block[j] = level;
6528         }
6529     }
6530 }
6531
6532 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6533                                    DCTELEM *block, int n, int qscale)
6534 {
6535     int i, level, nCoeffs;
6536     const uint16_t *quant_matrix;
6537
6538     nCoeffs= s->block_last_index[n];
6539
6540     quant_matrix = s->inter_matrix;
6541     for(i=0; i<=nCoeffs; i++) {
6542         int j= s->intra_scantable.permutated[i];
6543         level = block[j];
6544         if (level) {
6545             if (level < 0) {
6546                 level = -level;
6547                 level = (((level << 1) + 1) * qscale *
6548                          ((int) (quant_matrix[j]))) >> 4;
6549                 level = (level - 1) | 1;
6550                 level = -level;
6551             } else {
6552                 level = (((level << 1) + 1) * qscale *
6553                          ((int) (quant_matrix[j]))) >> 4;
6554                 level = (level - 1) | 1;
6555             }
6556             block[j] = level;
6557         }
6558     }
6559 }
6560
6561 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6562                                    DCTELEM *block, int n, int qscale)
6563 {
6564     int i, level, nCoeffs;
6565     const uint16_t *quant_matrix;
6566
6567     if(s->alternate_scan) nCoeffs= 63;
6568     else nCoeffs= s->block_last_index[n];
6569
6570     if (n < 4)
6571         block[0] = block[0] * s->y_dc_scale;
6572     else
6573         block[0] = block[0] * s->c_dc_scale;
6574     quant_matrix = s->intra_matrix;
6575     for(i=1;i<=nCoeffs;i++) {
6576         int j= s->intra_scantable.permutated[i];
6577         level = block[j];
6578         if (level) {
6579             if (level < 0) {
6580                 level = -level;
6581                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6582                 level = -level;
6583             } else {
6584                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6585             }
6586             block[j] = level;
6587         }
6588     }
6589 }
6590
6591 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6592                                    DCTELEM *block, int n, int qscale)
6593 {
6594     int i, level, nCoeffs;
6595     const uint16_t *quant_matrix;
6596     int sum=-1;
6597
6598     if(s->alternate_scan) nCoeffs= 63;
6599     else nCoeffs= s->block_last_index[n];
6600
6601     if (n < 4)
6602         block[0] = block[0] * s->y_dc_scale;
6603     else
6604         block[0] = block[0] * s->c_dc_scale;
6605     quant_matrix = s->intra_matrix;
6606     for(i=1;i<=nCoeffs;i++) {
6607         int j= s->intra_scantable.permutated[i];
6608         level = block[j];
6609         if (level) {
6610             if (level < 0) {
6611                 level = -level;
6612                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6613                 level = -level;
6614             } else {
6615                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6616             }
6617             block[j] = level;
6618             sum+=level;
6619         }
6620     }
6621     block[63]^=sum&1;
6622 }
6623
6624 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6625                                    DCTELEM *block, int n, int qscale)
6626 {
6627     int i, level, nCoeffs;
6628     const uint16_t *quant_matrix;
6629     int sum=-1;
6630
6631     if(s->alternate_scan) nCoeffs= 63;
6632     else nCoeffs= s->block_last_index[n];
6633
6634     quant_matrix = s->inter_matrix;
6635     for(i=0; i<=nCoeffs; i++) {
6636         int j= s->intra_scantable.permutated[i];
6637         level = block[j];
6638         if (level) {
6639             if (level < 0) {
6640                 level = -level;
6641                 level = (((level << 1) + 1) * qscale *
6642                          ((int) (quant_matrix[j]))) >> 4;
6643                 level = -level;
6644             } else {
6645                 level = (((level << 1) + 1) * qscale *
6646                          ((int) (quant_matrix[j]))) >> 4;
6647             }
6648             block[j] = level;
6649             sum+=level;
6650         }
6651     }
6652     block[63]^=sum&1;
6653 }
6654
6655 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6656                                   DCTELEM *block, int n, int qscale)
6657 {
6658     int i, level, qmul, qadd;
6659     int nCoeffs;
6660
6661     assert(s->block_last_index[n]>=0);
6662
6663     qmul = qscale << 1;
6664
6665     if (!s->h263_aic) {
6666         if (n < 4)
6667             block[0] = block[0] * s->y_dc_scale;
6668         else
6669             block[0] = block[0] * s->c_dc_scale;
6670         qadd = (qscale - 1) | 1;
6671     }else{
6672         qadd = 0;
6673     }
6674     if(s->ac_pred)
6675         nCoeffs=63;
6676     else
6677         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6678
6679     for(i=1; i<=nCoeffs; i++) {
6680         level = block[i];
6681         if (level) {
6682             if (level < 0) {
6683                 level = level * qmul - qadd;
6684             } else {
6685                 level = level * qmul + qadd;
6686             }
6687             block[i] = level;
6688         }
6689     }
6690 }
6691
6692 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6693                                   DCTELEM *block, int n, int qscale)
6694 {
6695     int i, level, qmul, qadd;
6696     int nCoeffs;
6697
6698     assert(s->block_last_index[n]>=0);
6699
6700     qadd = (qscale - 1) | 1;
6701     qmul = qscale << 1;
6702
6703     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6704
6705     for(i=0; i<=nCoeffs; i++) {
6706         level = block[i];
6707         if (level) {
6708             if (level < 0) {
6709                 level = level * qmul - qadd;
6710             } else {
6711                 level = level * qmul + qadd;
6712             }
6713             block[i] = level;
6714         }
6715     }
6716 }
6717
6718 #ifdef CONFIG_ENCODERS
6719 AVCodec h263_encoder = {
6720     "h263",
6721     CODEC_TYPE_VIDEO,
6722     CODEC_ID_H263,
6723     sizeof(MpegEncContext),
6724     MPV_encode_init,
6725     MPV_encode_picture,
6726     MPV_encode_end,
6727     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6728 };
6729
6730 AVCodec h263p_encoder = {
6731     "h263p",
6732     CODEC_TYPE_VIDEO,
6733     CODEC_ID_H263P,
6734     sizeof(MpegEncContext),
6735     MPV_encode_init,
6736     MPV_encode_picture,
6737     MPV_encode_end,
6738     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6739 };
6740
6741 AVCodec flv_encoder = {
6742     "flv",
6743     CODEC_TYPE_VIDEO,
6744     CODEC_ID_FLV1,
6745     sizeof(MpegEncContext),
6746     MPV_encode_init,
6747     MPV_encode_picture,
6748     MPV_encode_end,
6749     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6750 };
6751
6752 AVCodec rv10_encoder = {
6753     "rv10",
6754     CODEC_TYPE_VIDEO,
6755     CODEC_ID_RV10,
6756     sizeof(MpegEncContext),
6757     MPV_encode_init,
6758     MPV_encode_picture,
6759     MPV_encode_end,
6760     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6761 };
6762
6763 AVCodec rv20_encoder = {
6764     "rv20",
6765     CODEC_TYPE_VIDEO,
6766     CODEC_ID_RV20,
6767     sizeof(MpegEncContext),
6768     MPV_encode_init,
6769     MPV_encode_picture,
6770     MPV_encode_end,
6771     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6772 };
6773
6774 AVCodec mpeg4_encoder = {
6775     "mpeg4",
6776     CODEC_TYPE_VIDEO,
6777     CODEC_ID_MPEG4,
6778     sizeof(MpegEncContext),
6779     MPV_encode_init,
6780     MPV_encode_picture,
6781     MPV_encode_end,
6782     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6783     .capabilities= CODEC_CAP_DELAY,
6784 };
6785
6786 AVCodec msmpeg4v1_encoder = {
6787     "msmpeg4v1",
6788     CODEC_TYPE_VIDEO,
6789     CODEC_ID_MSMPEG4V1,
6790     sizeof(MpegEncContext),
6791     MPV_encode_init,
6792     MPV_encode_picture,
6793     MPV_encode_end,
6794     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6795 };
6796
6797 AVCodec msmpeg4v2_encoder = {
6798     "msmpeg4v2",
6799     CODEC_TYPE_VIDEO,
6800     CODEC_ID_MSMPEG4V2,
6801     sizeof(MpegEncContext),
6802     MPV_encode_init,
6803     MPV_encode_picture,
6804     MPV_encode_end,
6805     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6806 };
6807
6808 AVCodec msmpeg4v3_encoder = {
6809     "msmpeg4",
6810     CODEC_TYPE_VIDEO,
6811     CODEC_ID_MSMPEG4V3,
6812     sizeof(MpegEncContext),
6813     MPV_encode_init,
6814     MPV_encode_picture,
6815     MPV_encode_end,
6816     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6817 };
6818
6819 AVCodec wmv1_encoder = {
6820     "wmv1",
6821     CODEC_TYPE_VIDEO,
6822     CODEC_ID_WMV1,
6823     sizeof(MpegEncContext),
6824     MPV_encode_init,
6825     MPV_encode_picture,
6826     MPV_encode_end,
6827     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6828 };
6829
6830 AVCodec mjpeg_encoder = {
6831     "mjpeg",
6832     CODEC_TYPE_VIDEO,
6833     CODEC_ID_MJPEG,
6834     sizeof(MpegEncContext),
6835     MPV_encode_init,
6836     MPV_encode_picture,
6837     MPV_encode_end,
6838     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6839 };
6840
6841 #endif //CONFIG_ENCODERS