]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo.c
Original Commit: r105 | ods15 | 2006-10-01 18:09:55 +0200 (Sun, 01 Oct 2006) | 2...
[ffmpeg] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21  */
22
23 /**
24  * @file mpegvideo.c
25  * The simplest mpeg encoder (well, it was the simplest!).
26  */
27
28 #include "avcodec.h"
29 #include "dsputil.h"
30 #include "mpegvideo.h"
31 #include "faandct.h"
32 #include <limits.h>
33
34 #ifdef USE_FASTMEMCPY
35 #include "libvo/fastmemcpy.h"
36 #endif
37
38 //#undef NDEBUG
39 //#include <assert.h>
40
41 #ifdef CONFIG_ENCODERS
42 static int encode_picture(MpegEncContext *s, int picture_number);
43 #endif //CONFIG_ENCODERS
44 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
53                                    DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
57                                   DCTELEM *block, int n, int qscale);
58 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
59 #ifdef CONFIG_ENCODERS
60 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
61 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
62 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
63 static int sse_mb(MpegEncContext *s);
64 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
65 #endif //CONFIG_ENCODERS
66
67 #ifdef HAVE_XVMC
68 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
69 extern void XVMC_field_end(MpegEncContext *s);
70 extern void XVMC_decode_mb(MpegEncContext *s);
71 #endif
72
73 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
74
75
76 /* enable all paranoid tests for rounding, overflows, etc... */
77 //#define PARANOID
78
79 //#define DEBUG
80
81
82 /* for jpeg fast DCT */
83 #define CONST_BITS 14
84
85 static const uint16_t aanscales[64] = {
86     /* precomputed values scaled up by 14 bits */
87     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
88     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
89     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
90     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
91     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
92     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
93     8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
94     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
95 };
96
97 static const uint8_t h263_chroma_roundtab[16] = {
98 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
99     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
100 };
101
102 static const uint8_t ff_default_chroma_qscale_table[32]={
103 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
104     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
105 };
106
107 #ifdef CONFIG_ENCODERS
108 static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
109 static uint8_t default_fcode_tab[MAX_MV*2+1];
110
111 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
112
113 static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
114                            const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
115 {
116     int qscale;
117     int shift=0;
118
119     for(qscale=qmin; qscale<=qmax; qscale++){
120         int i;
121         if (dsp->fdct == ff_jpeg_fdct_islow
122 #ifdef FAAN_POSTSCALE
123             || dsp->fdct == ff_faandct
124 #endif
125             ) {
126             for(i=0;i<64;i++) {
127                 const int j= dsp->idct_permutation[i];
128                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
129                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
130                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
131                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
132
133                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
134                                 (qscale * quant_matrix[j]));
135             }
136         } else if (dsp->fdct == fdct_ifast
137 #ifndef FAAN_POSTSCALE
138                    || dsp->fdct == ff_faandct
139 #endif
140                    ) {
141             for(i=0;i<64;i++) {
142                 const int j= dsp->idct_permutation[i];
143                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
144                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
145                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
146                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
147
148                 qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
149                                 (aanscales[i] * qscale * quant_matrix[j]));
150             }
151         } else {
152             for(i=0;i<64;i++) {
153                 const int j= dsp->idct_permutation[i];
154                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
155                    So 16           <= qscale * quant_matrix[i]             <= 7905
156                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
157                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
158                 */
159                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
160 //                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
161                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
162
163                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
164                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
165             }
166         }
167
168         for(i=intra; i<64; i++){
169             int64_t max= 8191;
170             if (dsp->fdct == fdct_ifast
171 #ifndef FAAN_POSTSCALE
172                    || dsp->fdct == ff_faandct
173 #endif
174                    ) {
175                 max= (8191LL*aanscales[i]) >> 14;
176             }
177             while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
178                 shift++;
179             }
180         }
181     }
182     if(shift){
183         av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
184     }
185 }
186
187 static inline void update_qscale(MpegEncContext *s){
188     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
189     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
190
191     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
192 }
193 #endif //CONFIG_ENCODERS
194
195 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
196     int i;
197     int end;
198
199     st->scantable= src_scantable;
200
201     for(i=0; i<64; i++){
202         int j;
203         j = src_scantable[i];
204         st->permutated[i] = permutation[j];
205 #ifdef ARCH_POWERPC
206         st->inverse[j] = i;
207 #endif
208     }
209
210     end=-1;
211     for(i=0; i<64; i++){
212         int j;
213         j = st->permutated[i];
214         if(j>end) end=j;
215         st->raster_end[i]= end;
216     }
217 }
218
219 #ifdef CONFIG_ENCODERS
220 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix){
221     int i;
222
223     if(matrix){
224         put_bits(pb, 1, 1);
225         for(i=0;i<64;i++) {
226             put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
227         }
228     }else
229         put_bits(pb, 1, 0);
230 }
231 #endif //CONFIG_ENCODERS
232
233 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
234     int i;
235
236     assert(p<=end);
237     if(p>=end)
238         return end;
239
240     for(i=0; i<3; i++){
241         uint32_t tmp= *state << 8;
242         *state= tmp + *(p++);
243         if(tmp == 0x100 || p==end)
244             return p;
245     }
246
247     while(p<end){
248         if     (p[-1] > 1      ) p+= 3;
249         else if(p[-2]          ) p+= 2;
250         else if(p[-3]|(p[-1]-1)) p++;
251         else{
252             p++;
253             break;
254         }
255     }
256
257     p= FFMIN(p, end)-4;
258     *state=  be2me_32(unaligned32(p));
259
260     return p+4;
261 }
262
263 /* init common dct for both encoder and decoder */
264 int DCT_common_init(MpegEncContext *s)
265 {
266     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
267     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
268     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
269     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
270     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
271     if(s->flags & CODEC_FLAG_BITEXACT)
272         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
273     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
274
275 #ifdef CONFIG_ENCODERS
276     s->dct_quantize= dct_quantize_c;
277     s->denoise_dct= denoise_dct_c;
278 #endif //CONFIG_ENCODERS
279
280 #ifdef HAVE_MMX
281     MPV_common_init_mmx(s);
282 #endif
283 #ifdef ARCH_ALPHA
284     MPV_common_init_axp(s);
285 #endif
286 #ifdef HAVE_MLIB
287     MPV_common_init_mlib(s);
288 #endif
289 #ifdef HAVE_MMI
290     MPV_common_init_mmi(s);
291 #endif
292 #ifdef ARCH_ARMV4L
293     MPV_common_init_armv4l(s);
294 #endif
295 #ifdef ARCH_POWERPC
296     MPV_common_init_ppc(s);
297 #endif
298
299 #ifdef CONFIG_ENCODERS
300     s->fast_dct_quantize= s->dct_quantize;
301
302     if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
303         s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
304     }
305
306 #endif //CONFIG_ENCODERS
307
308     /* load & permutate scantables
309        note: only wmv uses different ones
310     */
311     if(s->alternate_scan){
312         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
313         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
314     }else{
315         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
316         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
317     }
318     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
319     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
320
321     return 0;
322 }
323
324 static void copy_picture(Picture *dst, Picture *src){
325     *dst = *src;
326     dst->type= FF_BUFFER_TYPE_COPY;
327 }
328
329 #ifdef CONFIG_ENCODERS
330 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
331     int i;
332
333     dst->pict_type              = src->pict_type;
334     dst->quality                = src->quality;
335     dst->coded_picture_number   = src->coded_picture_number;
336     dst->display_picture_number = src->display_picture_number;
337 //    dst->reference              = src->reference;
338     dst->pts                    = src->pts;
339     dst->interlaced_frame       = src->interlaced_frame;
340     dst->top_field_first        = src->top_field_first;
341
342     if(s->avctx->me_threshold){
343         if(!src->motion_val[0])
344             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
345         if(!src->mb_type)
346             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
347         if(!src->ref_index[0])
348             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
349         if(src->motion_subsample_log2 != dst->motion_subsample_log2)
350             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
351             src->motion_subsample_log2, dst->motion_subsample_log2);
352
353         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
354
355         for(i=0; i<2; i++){
356             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
357             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
358
359             if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
360                 memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
361             }
362             if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
363                 memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
364             }
365         }
366     }
367 }
368 #endif
369
370 /**
371  * allocates a Picture
372  * The pixels are allocated/set by calling get_buffer() if shared=0
373  */
374 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
375     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
376     const int mb_array_size= s->mb_stride*s->mb_height;
377     const int b8_array_size= s->b8_stride*s->mb_height*2;
378     const int b4_array_size= s->b4_stride*s->mb_height*4;
379     int i;
380
381     if(shared){
382         assert(pic->data[0]);
383         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
384         pic->type= FF_BUFFER_TYPE_SHARED;
385     }else{
386         int r;
387
388         assert(!pic->data[0]);
389
390         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
391
392         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
393             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
394             return -1;
395         }
396
397         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
398             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
399             return -1;
400         }
401
402         if(pic->linesize[1] != pic->linesize[2]){
403             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
404             return -1;
405         }
406
407         s->linesize  = pic->linesize[0];
408         s->uvlinesize= pic->linesize[1];
409     }
410
411     if(pic->qscale_table==NULL){
412         if (s->encoding) {
413             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
414             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
415             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
416         }
417
418         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
419         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
420         CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
421         pic->mb_type= pic->mb_type_base + s->mb_stride+1;
422         if(s->out_format == FMT_H264){
423             for(i=0; i<2; i++){
424                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
425                 pic->motion_val[i]= pic->motion_val_base[i]+4;
426                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
427             }
428             pic->motion_subsample_log2= 2;
429         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
430             for(i=0; i<2; i++){
431                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
432                 pic->motion_val[i]= pic->motion_val_base[i]+4;
433                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
434             }
435             pic->motion_subsample_log2= 3;
436         }
437         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
438             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
439         }
440         pic->qstride= s->mb_stride;
441         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
442     }
443
444     //it might be nicer if the application would keep track of these but it would require a API change
445     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
446     s->prev_pict_types[0]= s->pict_type;
447     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
448         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
449
450     return 0;
451 fail: //for the CHECKED_ALLOCZ macro
452     return -1;
453 }
454
455 /**
456  * deallocates a picture
457  */
458 static void free_picture(MpegEncContext *s, Picture *pic){
459     int i;
460
461     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
462         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
463     }
464
465     av_freep(&pic->mb_var);
466     av_freep(&pic->mc_mb_var);
467     av_freep(&pic->mb_mean);
468     av_freep(&pic->mbskip_table);
469     av_freep(&pic->qscale_table);
470     av_freep(&pic->mb_type_base);
471     av_freep(&pic->dct_coeff);
472     av_freep(&pic->pan_scan);
473     pic->mb_type= NULL;
474     for(i=0; i<2; i++){
475         av_freep(&pic->motion_val_base[i]);
476         av_freep(&pic->ref_index[i]);
477     }
478
479     if(pic->type == FF_BUFFER_TYPE_SHARED){
480         for(i=0; i<4; i++){
481             pic->base[i]=
482             pic->data[i]= NULL;
483         }
484         pic->type= 0;
485     }
486 }
487
488 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
489     int i;
490
491     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
492     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
493     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
494
495      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
496     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
497     s->rd_scratchpad=   s->me.scratchpad;
498     s->b_scratchpad=    s->me.scratchpad;
499     s->obmc_scratchpad= s->me.scratchpad + 16;
500     if (s->encoding) {
501         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
502         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
503         if(s->avctx->noise_reduction){
504             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
505         }
506     }
507     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
508     s->block= s->blocks[0];
509
510     for(i=0;i<12;i++){
511         s->pblocks[i] = (short *)(&s->block[i]);
512     }
513     return 0;
514 fail:
515     return -1; //free() through MPV_common_end()
516 }
517
518 static void free_duplicate_context(MpegEncContext *s){
519     if(s==NULL) return;
520
521     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
522     av_freep(&s->me.scratchpad);
523     s->rd_scratchpad=
524     s->b_scratchpad=
525     s->obmc_scratchpad= NULL;
526
527     av_freep(&s->dct_error_sum);
528     av_freep(&s->me.map);
529     av_freep(&s->me.score_map);
530     av_freep(&s->blocks);
531     s->block= NULL;
532 }
533
534 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
535 #define COPY(a) bak->a= src->a
536     COPY(allocated_edge_emu_buffer);
537     COPY(edge_emu_buffer);
538     COPY(me.scratchpad);
539     COPY(rd_scratchpad);
540     COPY(b_scratchpad);
541     COPY(obmc_scratchpad);
542     COPY(me.map);
543     COPY(me.score_map);
544     COPY(blocks);
545     COPY(block);
546     COPY(start_mb_y);
547     COPY(end_mb_y);
548     COPY(me.map_generation);
549     COPY(pb);
550     COPY(dct_error_sum);
551     COPY(dct_count[0]);
552     COPY(dct_count[1]);
553 #undef COPY
554 }
555
556 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
557     MpegEncContext bak;
558     int i;
559     //FIXME copy only needed parts
560 //START_TIMER
561     backup_duplicate_context(&bak, dst);
562     memcpy(dst, src, sizeof(MpegEncContext));
563     backup_duplicate_context(dst, &bak);
564     for(i=0;i<12;i++){
565         dst->pblocks[i] = (short *)(&dst->block[i]);
566     }
567 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
568 }
569
570 #ifdef CONFIG_ENCODERS
571 static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
572 #define COPY(a) dst->a= src->a
573     COPY(pict_type);
574     COPY(current_picture);
575     COPY(f_code);
576     COPY(b_code);
577     COPY(qscale);
578     COPY(lambda);
579     COPY(lambda2);
580     COPY(picture_in_gop_number);
581     COPY(gop_picture_number);
582     COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
583     COPY(progressive_frame); //FIXME don't set in encode_header
584     COPY(partitioned_frame); //FIXME don't set in encode_header
585 #undef COPY
586 }
587 #endif
588
589 /**
590  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
591  * the changed fields will not depend upon the prior state of the MpegEncContext.
592  */
593 static void MPV_common_defaults(MpegEncContext *s){
594     s->y_dc_scale_table=
595     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
596     s->chroma_qscale_table= ff_default_chroma_qscale_table;
597     s->progressive_frame= 1;
598     s->progressive_sequence= 1;
599     s->picture_structure= PICT_FRAME;
600
601     s->coded_picture_number = 0;
602     s->picture_number = 0;
603     s->input_picture_number = 0;
604
605     s->picture_in_gop_number = 0;
606
607     s->f_code = 1;
608     s->b_code = 1;
609 }
610
611 /**
612  * sets the given MpegEncContext to defaults for decoding.
613  * the changed fields will not depend upon the prior state of the MpegEncContext.
614  */
615 void MPV_decode_defaults(MpegEncContext *s){
616     MPV_common_defaults(s);
617 }
618
619 /**
620  * sets the given MpegEncContext to defaults for encoding.
621  * the changed fields will not depend upon the prior state of the MpegEncContext.
622  */
623
624 #ifdef CONFIG_ENCODERS
625 static void MPV_encode_defaults(MpegEncContext *s){
626     static int done=0;
627
628     MPV_common_defaults(s);
629
630     if(!done){
631         int i;
632         done=1;
633
634         default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
635         memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
636
637         for(i=-16; i<16; i++){
638             default_fcode_tab[i + MAX_MV]= 1;
639         }
640     }
641     s->me.mv_penalty= default_mv_penalty;
642     s->fcode_tab= default_fcode_tab;
643 }
644 #endif //CONFIG_ENCODERS
645
646 /**
647  * init common structure for both encoder and decoder.
648  * this assumes that some variables like width/height are already set
649  */
650 int MPV_common_init(MpegEncContext *s)
651 {
652     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
653
654     s->mb_height = (s->height + 15) / 16;
655
656     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
657         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
658         return -1;
659     }
660
661     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
662         return -1;
663
664     dsputil_init(&s->dsp, s->avctx);
665     DCT_common_init(s);
666
667     s->flags= s->avctx->flags;
668     s->flags2= s->avctx->flags2;
669
670     s->mb_width  = (s->width  + 15) / 16;
671     s->mb_stride = s->mb_width + 1;
672     s->b8_stride = s->mb_width*2 + 1;
673     s->b4_stride = s->mb_width*4 + 1;
674     mb_array_size= s->mb_height * s->mb_stride;
675     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
676
677     /* set chroma shifts */
678     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
679                                                     &(s->chroma_y_shift) );
680
681     /* set default edge pos, will be overriden in decode_header if needed */
682     s->h_edge_pos= s->mb_width*16;
683     s->v_edge_pos= s->mb_height*16;
684
685     s->mb_num = s->mb_width * s->mb_height;
686
687     s->block_wrap[0]=
688     s->block_wrap[1]=
689     s->block_wrap[2]=
690     s->block_wrap[3]= s->b8_stride;
691     s->block_wrap[4]=
692     s->block_wrap[5]= s->mb_stride;
693
694     y_size = s->b8_stride * (2 * s->mb_height + 1);
695     c_size = s->mb_stride * (s->mb_height + 1);
696     yc_size = y_size + 2 * c_size;
697
698     /* convert fourcc to upper case */
699     s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
700                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
701                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
702                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
703
704     s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
705                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
706                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
707                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
708
709     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
710
711     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
712     for(y=0; y<s->mb_height; y++){
713         for(x=0; x<s->mb_width; x++){
714             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
715         }
716     }
717     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
718
719     if (s->encoding) {
720         /* Allocate MV tables */
721         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
722         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
723         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
724         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
725         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
726         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
727         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
728         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
729         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
730         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
731         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
732         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
733
734         if(s->msmpeg4_version){
735             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
736         }
737         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
738
739         /* Allocate MB type table */
740         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
741
742         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
743
744         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
745         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
746         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
747         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
748         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
749         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
750
751         if(s->avctx->noise_reduction){
752             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
753         }
754     }
755     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
756
757     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
758
759     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
760         /* interlaced direct mode decoding tables */
761             for(i=0; i<2; i++){
762                 int j, k;
763                 for(j=0; j<2; j++){
764                     for(k=0; k<2; k++){
765                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
766                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
767                     }
768                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
769                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
770                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
771                 }
772                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
773             }
774     }
775     if (s->out_format == FMT_H263) {
776         /* ac values */
777         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
778         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
779         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
780         s->ac_val[2] = s->ac_val[1] + c_size;
781
782         /* cbp values */
783         CHECKED_ALLOCZ(s->coded_block_base, y_size);
784         s->coded_block= s->coded_block_base + s->b8_stride + 1;
785
786         /* cbp, ac_pred, pred_dir */
787         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
788         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
789     }
790
791     if (s->h263_pred || s->h263_plus || !s->encoding) {
792         /* dc values */
793         //MN: we need these for error resilience of intra-frames
794         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
795         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
796         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
797         s->dc_val[2] = s->dc_val[1] + c_size;
798         for(i=0;i<yc_size;i++)
799             s->dc_val_base[i] = 1024;
800     }
801
802     /* which mb is a intra block */
803     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
804     memset(s->mbintra_table, 1, mb_array_size);
805
806     /* init macroblock skip table */
807     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
808     //Note the +1 is for a quicker mpeg4 slice_end detection
809     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
810
811     s->parse_context.state= -1;
812     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
813        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
814        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
815        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
816     }
817
818     s->context_initialized = 1;
819
820     s->thread_context[0]= s;
821     for(i=1; i<s->avctx->thread_count; i++){
822         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
823         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
824     }
825
826     for(i=0; i<s->avctx->thread_count; i++){
827         if(init_duplicate_context(s->thread_context[i], s) < 0)
828            goto fail;
829         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
830         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
831     }
832
833     return 0;
834  fail:
835     MPV_common_end(s);
836     return -1;
837 }
838
839 /* init common structure for both encoder and decoder */
840 void MPV_common_end(MpegEncContext *s)
841 {
842     int i, j, k;
843
844     for(i=0; i<s->avctx->thread_count; i++){
845         free_duplicate_context(s->thread_context[i]);
846     }
847     for(i=1; i<s->avctx->thread_count; i++){
848         av_freep(&s->thread_context[i]);
849     }
850
851     av_freep(&s->parse_context.buffer);
852     s->parse_context.buffer_size=0;
853
854     av_freep(&s->mb_type);
855     av_freep(&s->p_mv_table_base);
856     av_freep(&s->b_forw_mv_table_base);
857     av_freep(&s->b_back_mv_table_base);
858     av_freep(&s->b_bidir_forw_mv_table_base);
859     av_freep(&s->b_bidir_back_mv_table_base);
860     av_freep(&s->b_direct_mv_table_base);
861     s->p_mv_table= NULL;
862     s->b_forw_mv_table= NULL;
863     s->b_back_mv_table= NULL;
864     s->b_bidir_forw_mv_table= NULL;
865     s->b_bidir_back_mv_table= NULL;
866     s->b_direct_mv_table= NULL;
867     for(i=0; i<2; i++){
868         for(j=0; j<2; j++){
869             for(k=0; k<2; k++){
870                 av_freep(&s->b_field_mv_table_base[i][j][k]);
871                 s->b_field_mv_table[i][j][k]=NULL;
872             }
873             av_freep(&s->b_field_select_table[i][j]);
874             av_freep(&s->p_field_mv_table_base[i][j]);
875             s->p_field_mv_table[i][j]=NULL;
876         }
877         av_freep(&s->p_field_select_table[i]);
878     }
879
880     av_freep(&s->dc_val_base);
881     av_freep(&s->ac_val_base);
882     av_freep(&s->coded_block_base);
883     av_freep(&s->mbintra_table);
884     av_freep(&s->cbp_table);
885     av_freep(&s->pred_dir_table);
886
887     av_freep(&s->mbskip_table);
888     av_freep(&s->prev_pict_types);
889     av_freep(&s->bitstream_buffer);
890     s->allocated_bitstream_buffer_size=0;
891
892     av_freep(&s->avctx->stats_out);
893     av_freep(&s->ac_stats);
894     av_freep(&s->error_status_table);
895     av_freep(&s->mb_index2xy);
896     av_freep(&s->lambda_table);
897     av_freep(&s->q_intra_matrix);
898     av_freep(&s->q_inter_matrix);
899     av_freep(&s->q_intra_matrix16);
900     av_freep(&s->q_inter_matrix16);
901     av_freep(&s->input_picture);
902     av_freep(&s->reordered_input_picture);
903     av_freep(&s->dct_offset);
904
905     if(s->picture){
906         for(i=0; i<MAX_PICTURE_COUNT; i++){
907             free_picture(s, &s->picture[i]);
908         }
909     }
910     av_freep(&s->picture);
911     s->context_initialized = 0;
912     s->last_picture_ptr=
913     s->next_picture_ptr=
914     s->current_picture_ptr= NULL;
915     s->linesize= s->uvlinesize= 0;
916
917     for(i=0; i<3; i++)
918         av_freep(&s->visualization_buffer[i]);
919
920     avcodec_default_free_buffers(s->avctx);
921 }
922
923 #ifdef CONFIG_ENCODERS
924
925 /* init video encoder */
926 int MPV_encode_init(AVCodecContext *avctx)
927 {
928     MpegEncContext *s = avctx->priv_data;
929     int i;
930     int chroma_h_shift, chroma_v_shift;
931
932     MPV_encode_defaults(s);
933
934     switch (avctx->codec_id) {
935     case CODEC_ID_MPEG2VIDEO:
936         if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){
937             av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n");
938             return -1;
939         }
940         break;
941     case CODEC_ID_LJPEG:
942     case CODEC_ID_MJPEG:
943         if(avctx->pix_fmt != PIX_FMT_YUVJ420P && (avctx->pix_fmt != PIX_FMT_YUV420P || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){
944             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
945             return -1;
946         }
947         break;
948     default:
949         if(avctx->pix_fmt != PIX_FMT_YUV420P){
950             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
951             return -1;
952         }
953     }
954
955     switch (avctx->pix_fmt) {
956     case PIX_FMT_YUVJ422P:
957     case PIX_FMT_YUV422P:
958         s->chroma_format = CHROMA_422;
959         break;
960     case PIX_FMT_YUVJ420P:
961     case PIX_FMT_YUV420P:
962     default:
963         s->chroma_format = CHROMA_420;
964         break;
965     }
966
967     s->bit_rate = avctx->bit_rate;
968     s->width = avctx->width;
969     s->height = avctx->height;
970     if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){
971         av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
972         avctx->gop_size=600;
973     }
974     s->gop_size = avctx->gop_size;
975     s->avctx = avctx;
976     s->flags= avctx->flags;
977     s->flags2= avctx->flags2;
978     s->max_b_frames= avctx->max_b_frames;
979     s->codec_id= avctx->codec->id;
980     s->luma_elim_threshold  = avctx->luma_elim_threshold;
981     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
982     s->strict_std_compliance= avctx->strict_std_compliance;
983     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
984     s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
985     s->mpeg_quant= avctx->mpeg_quant;
986     s->rtp_mode= !!avctx->rtp_payload_size;
987     s->intra_dc_precision= avctx->intra_dc_precision;
988     s->user_specified_pts = AV_NOPTS_VALUE;
989
990     if (s->gop_size <= 1) {
991         s->intra_only = 1;
992         s->gop_size = 12;
993     } else {
994         s->intra_only = 0;
995     }
996
997     s->me_method = avctx->me_method;
998
999     /* Fixed QSCALE */
1000     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
1001
1002     s->adaptive_quant= (   s->avctx->lumi_masking
1003                         || s->avctx->dark_masking
1004                         || s->avctx->temporal_cplx_masking
1005                         || s->avctx->spatial_cplx_masking
1006                         || s->avctx->p_masking
1007                         || s->avctx->border_masking
1008                         || (s->flags&CODEC_FLAG_QP_RD))
1009                        && !s->fixed_qscale;
1010
1011     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
1012     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
1013     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
1014     s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
1015
1016     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
1017         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
1018         return -1;
1019     }
1020
1021     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1022         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1023     }
1024
1025     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1026         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1027         return -1;
1028     }
1029
1030     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1031         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1032         return -1;
1033     }
1034
1035     if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1036        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1037        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1038
1039         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1040     }
1041
1042     if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1043        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1044         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1045         return -1;
1046     }
1047
1048     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1049         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1050         return -1;
1051     }
1052
1053     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1054         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1055         return -1;
1056     }
1057
1058     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1059         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1060         return -1;
1061     }
1062
1063     if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1064         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1065         return -1;
1066     }
1067
1068     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1069         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1070         return -1;
1071     }
1072
1073     if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1074        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1075         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1076         return -1;
1077     }
1078
1079     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1080         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1081         return -1;
1082     }
1083
1084     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1085         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1086         return -1;
1087     }
1088
1089     if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1090         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1091         return -1;
1092     }
1093
1094     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1095         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1096         return -1;
1097     }
1098
1099     if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){
1100         av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n");
1101         return -1;
1102     }
1103
1104     if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1105        && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1106        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1107         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1108         return -1;
1109     }
1110
1111     if(s->avctx->thread_count > 1)
1112         s->rtp_mode= 1;
1113
1114     if(!avctx->time_base.den || !avctx->time_base.num){
1115         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1116         return -1;
1117     }
1118
1119     i= (INT_MAX/2+128)>>8;
1120     if(avctx->me_threshold >= i){
1121         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1122         return -1;
1123     }
1124     if(avctx->mb_threshold >= i){
1125         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1126         return -1;
1127     }
1128
1129     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1130         av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n");
1131         avctx->b_frame_strategy = 0;
1132     }
1133
1134     i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1135     if(i > 1){
1136         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1137         avctx->time_base.den /= i;
1138         avctx->time_base.num /= i;
1139 //        return -1;
1140     }
1141
1142     if(s->codec_id==CODEC_ID_MJPEG){
1143         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1144         s->inter_quant_bias= 0;
1145     }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1146         s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1147         s->inter_quant_bias= 0;
1148     }else{
1149         s->intra_quant_bias=0;
1150         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1151     }
1152
1153     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1154         s->intra_quant_bias= avctx->intra_quant_bias;
1155     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1156         s->inter_quant_bias= avctx->inter_quant_bias;
1157
1158     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1159
1160     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1161         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1162         return -1;
1163     }
1164     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1165
1166     switch(avctx->codec->id) {
1167     case CODEC_ID_MPEG1VIDEO:
1168         s->out_format = FMT_MPEG1;
1169         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1170         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1171         break;
1172     case CODEC_ID_MPEG2VIDEO:
1173         s->out_format = FMT_MPEG1;
1174         s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1175         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1176         s->rtp_mode= 1;
1177         break;
1178     case CODEC_ID_LJPEG:
1179     case CODEC_ID_JPEGLS:
1180     case CODEC_ID_MJPEG:
1181         s->out_format = FMT_MJPEG;
1182         s->intra_only = 1; /* force intra only for jpeg */
1183         s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1184         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1185         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1186         s->mjpeg_vsample[1] = 1;
1187         s->mjpeg_vsample[2] = 1;
1188         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1189         s->mjpeg_hsample[1] = 1;
1190         s->mjpeg_hsample[2] = 1;
1191         if (mjpeg_init(s) < 0)
1192             return -1;
1193         avctx->delay=0;
1194         s->low_delay=1;
1195         break;
1196     case CODEC_ID_H261:
1197         s->out_format = FMT_H261;
1198         avctx->delay=0;
1199         s->low_delay=1;
1200         break;
1201     case CODEC_ID_H263:
1202         if (h263_get_picture_format(s->width, s->height) == 7) {
1203             av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1204             return -1;
1205         }
1206         s->out_format = FMT_H263;
1207         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1208         avctx->delay=0;
1209         s->low_delay=1;
1210         break;
1211     case CODEC_ID_H263P:
1212         s->out_format = FMT_H263;
1213         s->h263_plus = 1;
1214         /* Fx */
1215         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1216         s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1217         s->modified_quant= s->h263_aic;
1218         s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1219         s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1220         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1221         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1222         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1223
1224         /* /Fx */
1225         /* These are just to be sure */
1226         avctx->delay=0;
1227         s->low_delay=1;
1228         break;
1229     case CODEC_ID_FLV1:
1230         s->out_format = FMT_H263;
1231         s->h263_flv = 2; /* format = 1; 11-bit codes */
1232         s->unrestricted_mv = 1;
1233         s->rtp_mode=0; /* don't allow GOB */
1234         avctx->delay=0;
1235         s->low_delay=1;
1236         break;
1237     case CODEC_ID_RV10:
1238         s->out_format = FMT_H263;
1239         avctx->delay=0;
1240         s->low_delay=1;
1241         break;
1242     case CODEC_ID_RV20:
1243         s->out_format = FMT_H263;
1244         avctx->delay=0;
1245         s->low_delay=1;
1246         s->modified_quant=1;
1247         s->h263_aic=1;
1248         s->h263_plus=1;
1249         s->loop_filter=1;
1250         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1251         break;
1252     case CODEC_ID_MPEG4:
1253         s->out_format = FMT_H263;
1254         s->h263_pred = 1;
1255         s->unrestricted_mv = 1;
1256         s->low_delay= s->max_b_frames ? 0 : 1;
1257         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1258         break;
1259     case CODEC_ID_MSMPEG4V1:
1260         s->out_format = FMT_H263;
1261         s->h263_msmpeg4 = 1;
1262         s->h263_pred = 1;
1263         s->unrestricted_mv = 1;
1264         s->msmpeg4_version= 1;
1265         avctx->delay=0;
1266         s->low_delay=1;
1267         break;
1268     case CODEC_ID_MSMPEG4V2:
1269         s->out_format = FMT_H263;
1270         s->h263_msmpeg4 = 1;
1271         s->h263_pred = 1;
1272         s->unrestricted_mv = 1;
1273         s->msmpeg4_version= 2;
1274         avctx->delay=0;
1275         s->low_delay=1;
1276         break;
1277     case CODEC_ID_MSMPEG4V3:
1278         s->out_format = FMT_H263;
1279         s->h263_msmpeg4 = 1;
1280         s->h263_pred = 1;
1281         s->unrestricted_mv = 1;
1282         s->msmpeg4_version= 3;
1283         s->flipflop_rounding=1;
1284         avctx->delay=0;
1285         s->low_delay=1;
1286         break;
1287     case CODEC_ID_WMV1:
1288         s->out_format = FMT_H263;
1289         s->h263_msmpeg4 = 1;
1290         s->h263_pred = 1;
1291         s->unrestricted_mv = 1;
1292         s->msmpeg4_version= 4;
1293         s->flipflop_rounding=1;
1294         avctx->delay=0;
1295         s->low_delay=1;
1296         break;
1297     case CODEC_ID_WMV2:
1298         s->out_format = FMT_H263;
1299         s->h263_msmpeg4 = 1;
1300         s->h263_pred = 1;
1301         s->unrestricted_mv = 1;
1302         s->msmpeg4_version= 5;
1303         s->flipflop_rounding=1;
1304         avctx->delay=0;
1305         s->low_delay=1;
1306         break;
1307     default:
1308         return -1;
1309     }
1310
1311     avctx->has_b_frames= !s->low_delay;
1312
1313     s->encoding = 1;
1314
1315     /* init */
1316     if (MPV_common_init(s) < 0)
1317         return -1;
1318
1319     if(s->modified_quant)
1320         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1321     s->progressive_frame=
1322     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1323     s->quant_precision=5;
1324
1325     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1326     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1327
1328 #ifdef CONFIG_H261_ENCODER
1329     if (s->out_format == FMT_H261)
1330         ff_h261_encode_init(s);
1331 #endif
1332     if (s->out_format == FMT_H263)
1333         h263_encode_init(s);
1334     if(s->msmpeg4_version)
1335         ff_msmpeg4_encode_init(s);
1336     if (s->out_format == FMT_MPEG1)
1337         ff_mpeg1_encode_init(s);
1338
1339     /* init q matrix */
1340     for(i=0;i<64;i++) {
1341         int j= s->dsp.idct_permutation[i];
1342         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1343             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1344             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1345         }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1346             s->intra_matrix[j] =
1347             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1348         }else
1349         { /* mpeg1/2 */
1350             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1351             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1352         }
1353         if(s->avctx->intra_matrix)
1354             s->intra_matrix[j] = s->avctx->intra_matrix[i];
1355         if(s->avctx->inter_matrix)
1356             s->inter_matrix[j] = s->avctx->inter_matrix[i];
1357     }
1358
1359     /* precompute matrix */
1360     /* for mjpeg, we do include qscale in the matrix */
1361     if (s->out_format != FMT_MJPEG) {
1362         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1363                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1364         convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1365                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1366     }
1367
1368     if(ff_rate_control_init(s) < 0)
1369         return -1;
1370
1371     return 0;
1372 }
1373
1374 int MPV_encode_end(AVCodecContext *avctx)
1375 {
1376     MpegEncContext *s = avctx->priv_data;
1377
1378     ff_rate_control_uninit(s);
1379
1380     MPV_common_end(s);
1381     if (s->out_format == FMT_MJPEG)
1382         mjpeg_close(s);
1383
1384     av_freep(&avctx->extradata);
1385
1386     return 0;
1387 }
1388
1389 #endif //CONFIG_ENCODERS
1390
1391 void init_rl(RLTable *rl, int use_static)
1392 {
1393     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1394     uint8_t index_run[MAX_RUN+1];
1395     int last, run, level, start, end, i;
1396
1397     /* If table is static, we can quit if rl->max_level[0] is not NULL */
1398     if(use_static && rl->max_level[0])
1399         return;
1400
1401     /* compute max_level[], max_run[] and index_run[] */
1402     for(last=0;last<2;last++) {
1403         if (last == 0) {
1404             start = 0;
1405             end = rl->last;
1406         } else {
1407             start = rl->last;
1408             end = rl->n;
1409         }
1410
1411         memset(max_level, 0, MAX_RUN + 1);
1412         memset(max_run, 0, MAX_LEVEL + 1);
1413         memset(index_run, rl->n, MAX_RUN + 1);
1414         for(i=start;i<end;i++) {
1415             run = rl->table_run[i];
1416             level = rl->table_level[i];
1417             if (index_run[run] == rl->n)
1418                 index_run[run] = i;
1419             if (level > max_level[run])
1420                 max_level[run] = level;
1421             if (run > max_run[level])
1422                 max_run[level] = run;
1423         }
1424         if(use_static)
1425             rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1426         else
1427             rl->max_level[last] = av_malloc(MAX_RUN + 1);
1428         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1429         if(use_static)
1430             rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1431         else
1432             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1433         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1434         if(use_static)
1435             rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1436         else
1437             rl->index_run[last] = av_malloc(MAX_RUN + 1);
1438         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1439     }
1440 }
1441
1442 /* draw the edges of width 'w' of an image of size width, height */
1443 //FIXME check that this is ok for mpeg4 interlaced
1444 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1445 {
1446     uint8_t *ptr, *last_line;
1447     int i;
1448
1449     last_line = buf + (height - 1) * wrap;
1450     for(i=0;i<w;i++) {
1451         /* top and bottom */
1452         memcpy(buf - (i + 1) * wrap, buf, width);
1453         memcpy(last_line + (i + 1) * wrap, last_line, width);
1454     }
1455     /* left and right */
1456     ptr = buf;
1457     for(i=0;i<height;i++) {
1458         memset(ptr - w, ptr[0], w);
1459         memset(ptr + width, ptr[width-1], w);
1460         ptr += wrap;
1461     }
1462     /* corners */
1463     for(i=0;i<w;i++) {
1464         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1465         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1466         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1467         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1468     }
1469 }
1470
1471 int ff_find_unused_picture(MpegEncContext *s, int shared){
1472     int i;
1473
1474     if(shared){
1475         for(i=0; i<MAX_PICTURE_COUNT; i++){
1476             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1477         }
1478     }else{
1479         for(i=0; i<MAX_PICTURE_COUNT; i++){
1480             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1481         }
1482         for(i=0; i<MAX_PICTURE_COUNT; i++){
1483             if(s->picture[i].data[0]==NULL) return i;
1484         }
1485     }
1486
1487     assert(0);
1488     return -1;
1489 }
1490
1491 static void update_noise_reduction(MpegEncContext *s){
1492     int intra, i;
1493
1494     for(intra=0; intra<2; intra++){
1495         if(s->dct_count[intra] > (1<<16)){
1496             for(i=0; i<64; i++){
1497                 s->dct_error_sum[intra][i] >>=1;
1498             }
1499             s->dct_count[intra] >>= 1;
1500         }
1501
1502         for(i=0; i<64; i++){
1503             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1504         }
1505     }
1506 }
1507
1508 /**
1509  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1510  */
1511 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1512 {
1513     int i;
1514     AVFrame *pic;
1515     s->mb_skipped = 0;
1516
1517     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1518
1519     /* mark&release old frames */
1520     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1521         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1522
1523         /* release forgotten pictures */
1524         /* if(mpeg124/h263) */
1525         if(!s->encoding){
1526             for(i=0; i<MAX_PICTURE_COUNT; i++){
1527                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1528                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1529                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1530                 }
1531             }
1532         }
1533     }
1534 alloc:
1535     if(!s->encoding){
1536         /* release non reference frames */
1537         for(i=0; i<MAX_PICTURE_COUNT; i++){
1538             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1539                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1540             }
1541         }
1542
1543         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1544             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1545         else{
1546             i= ff_find_unused_picture(s, 0);
1547             pic= (AVFrame*)&s->picture[i];
1548         }
1549
1550         pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1551                         && !s->dropable ? 3 : 0;
1552
1553         pic->coded_picture_number= s->coded_picture_number++;
1554
1555         if( alloc_picture(s, (Picture*)pic, 0) < 0)
1556             return -1;
1557
1558         s->current_picture_ptr= (Picture*)pic;
1559         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1560         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1561     }
1562
1563     s->current_picture_ptr->pict_type= s->pict_type;
1564 //    if(s->flags && CODEC_FLAG_QSCALE)
1565   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1566     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1567
1568     copy_picture(&s->current_picture, s->current_picture_ptr);
1569
1570   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1571     if (s->pict_type != B_TYPE) {
1572         s->last_picture_ptr= s->next_picture_ptr;
1573         if(!s->dropable)
1574             s->next_picture_ptr= s->current_picture_ptr;
1575     }
1576 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1577         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1578         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1579         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1580         s->pict_type, s->dropable);*/
1581
1582     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1583     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1584
1585     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1586         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1587         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1588         goto alloc;
1589     }
1590
1591     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1592
1593     if(s->picture_structure!=PICT_FRAME){
1594         int i;
1595         for(i=0; i<4; i++){
1596             if(s->picture_structure == PICT_BOTTOM_FIELD){
1597                  s->current_picture.data[i] += s->current_picture.linesize[i];
1598             }
1599             s->current_picture.linesize[i] *= 2;
1600             s->last_picture.linesize[i] *=2;
1601             s->next_picture.linesize[i] *=2;
1602         }
1603     }
1604   }
1605
1606     s->hurry_up= s->avctx->hurry_up;
1607     s->error_resilience= avctx->error_resilience;
1608
1609     /* set dequantizer, we can't do it during init as it might change for mpeg4
1610        and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1611     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1612         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1613         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1614     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1615         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1616         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1617     }else{
1618         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1619         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1620     }
1621
1622     if(s->dct_error_sum){
1623         assert(s->avctx->noise_reduction && s->encoding);
1624
1625         update_noise_reduction(s);
1626     }
1627
1628 #ifdef HAVE_XVMC
1629     if(s->avctx->xvmc_acceleration)
1630         return XVMC_field_start(s, avctx);
1631 #endif
1632     return 0;
1633 }
1634
1635 /* generic function for encode/decode called after a frame has been coded/decoded */
1636 void MPV_frame_end(MpegEncContext *s)
1637 {
1638     int i;
1639     /* draw edge for correct motion prediction if outside */
1640 #ifdef HAVE_XVMC
1641 //just to make sure that all data is rendered.
1642     if(s->avctx->xvmc_acceleration){
1643         XVMC_field_end(s);
1644     }else
1645 #endif
1646     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1647             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1648             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1649             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1650     }
1651     emms_c();
1652
1653     s->last_pict_type    = s->pict_type;
1654     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1655     if(s->pict_type!=B_TYPE){
1656         s->last_non_b_pict_type= s->pict_type;
1657     }
1658 #if 0
1659         /* copy back current_picture variables */
1660     for(i=0; i<MAX_PICTURE_COUNT; i++){
1661         if(s->picture[i].data[0] == s->current_picture.data[0]){
1662             s->picture[i]= s->current_picture;
1663             break;
1664         }
1665     }
1666     assert(i<MAX_PICTURE_COUNT);
1667 #endif
1668
1669     if(s->encoding){
1670         /* release non-reference frames */
1671         for(i=0; i<MAX_PICTURE_COUNT; i++){
1672             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1673                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1674             }
1675         }
1676     }
1677     // clear copies, to avoid confusion
1678 #if 0
1679     memset(&s->last_picture, 0, sizeof(Picture));
1680     memset(&s->next_picture, 0, sizeof(Picture));
1681     memset(&s->current_picture, 0, sizeof(Picture));
1682 #endif
1683     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1684 }
1685
1686 /**
1687  * draws an line from (ex, ey) -> (sx, sy).
1688  * @param w width of the image
1689  * @param h height of the image
1690  * @param stride stride/linesize of the image
1691  * @param color color of the arrow
1692  */
1693 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1694     int x, y, fr, f;
1695
1696     sx= clip(sx, 0, w-1);
1697     sy= clip(sy, 0, h-1);
1698     ex= clip(ex, 0, w-1);
1699     ey= clip(ey, 0, h-1);
1700
1701     buf[sy*stride + sx]+= color;
1702
1703     if(ABS(ex - sx) > ABS(ey - sy)){
1704         if(sx > ex){
1705             SWAP(int, sx, ex);
1706             SWAP(int, sy, ey);
1707         }
1708         buf+= sx + sy*stride;
1709         ex-= sx;
1710         f= ((ey-sy)<<16)/ex;
1711         for(x= 0; x <= ex; x++){
1712             y = (x*f)>>16;
1713             fr= (x*f)&0xFFFF;
1714             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1715             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1716         }
1717     }else{
1718         if(sy > ey){
1719             SWAP(int, sx, ex);
1720             SWAP(int, sy, ey);
1721         }
1722         buf+= sx + sy*stride;
1723         ey-= sy;
1724         if(ey) f= ((ex-sx)<<16)/ey;
1725         else   f= 0;
1726         for(y= 0; y <= ey; y++){
1727             x = (y*f)>>16;
1728             fr= (y*f)&0xFFFF;
1729             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1730             buf[y*stride + x+1]+= (color*         fr )>>16;;
1731         }
1732     }
1733 }
1734
1735 /**
1736  * draws an arrow from (ex, ey) -> (sx, sy).
1737  * @param w width of the image
1738  * @param h height of the image
1739  * @param stride stride/linesize of the image
1740  * @param color color of the arrow
1741  */
1742 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1743     int dx,dy;
1744
1745     sx= clip(sx, -100, w+100);
1746     sy= clip(sy, -100, h+100);
1747     ex= clip(ex, -100, w+100);
1748     ey= clip(ey, -100, h+100);
1749
1750     dx= ex - sx;
1751     dy= ey - sy;
1752
1753     if(dx*dx + dy*dy > 3*3){
1754         int rx=  dx + dy;
1755         int ry= -dx + dy;
1756         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1757
1758         //FIXME subpixel accuracy
1759         rx= ROUNDED_DIV(rx*3<<4, length);
1760         ry= ROUNDED_DIV(ry*3<<4, length);
1761
1762         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1763         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1764     }
1765     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1766 }
1767
1768 /**
1769  * prints debuging info for the given picture.
1770  */
1771 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1772
1773     if(!pict || !pict->mb_type) return;
1774
1775     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1776         int x,y;
1777
1778         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1779         switch (pict->pict_type) {
1780             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1781             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1782             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1783             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1784             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1785             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1786         }
1787         for(y=0; y<s->mb_height; y++){
1788             for(x=0; x<s->mb_width; x++){
1789                 if(s->avctx->debug&FF_DEBUG_SKIP){
1790                     int count= s->mbskip_table[x + y*s->mb_stride];
1791                     if(count>9) count=9;
1792                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1793                 }
1794                 if(s->avctx->debug&FF_DEBUG_QP){
1795                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1796                 }
1797                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1798                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1799                     //Type & MV direction
1800                     if(IS_PCM(mb_type))
1801                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1802                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1803                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1804                     else if(IS_INTRA4x4(mb_type))
1805                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1806                     else if(IS_INTRA16x16(mb_type))
1807                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1808                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1809                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1810                     else if(IS_DIRECT(mb_type))
1811                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1812                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1813                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1814                     else if(IS_GMC(mb_type))
1815                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1816                     else if(IS_SKIP(mb_type))
1817                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1818                     else if(!USES_LIST(mb_type, 1))
1819                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1820                     else if(!USES_LIST(mb_type, 0))
1821                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1822                     else{
1823                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1824                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1825                     }
1826
1827                     //segmentation
1828                     if(IS_8X8(mb_type))
1829                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1830                     else if(IS_16X8(mb_type))
1831                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1832                     else if(IS_8X16(mb_type))
1833                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1834                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1835                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1836                     else
1837                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1838
1839
1840                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1841                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1842                     else
1843                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1844                 }
1845 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1846             }
1847             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1848         }
1849     }
1850
1851     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1852         const int shift= 1 + s->quarter_sample;
1853         int mb_y;
1854         uint8_t *ptr;
1855         int i;
1856         int h_chroma_shift, v_chroma_shift;
1857         const int width = s->avctx->width;
1858         const int height= s->avctx->height;
1859         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1860         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1861         s->low_delay=0; //needed to see the vectors without trashing the buffers
1862
1863         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1864         for(i=0; i<3; i++){
1865             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1866             pict->data[i]= s->visualization_buffer[i];
1867         }
1868         pict->type= FF_BUFFER_TYPE_COPY;
1869         ptr= pict->data[0];
1870
1871         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1872             int mb_x;
1873             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1874                 const int mb_index= mb_x + mb_y*s->mb_stride;
1875                 if((s->avctx->debug_mv) && pict->motion_val){
1876                   int type;
1877                   for(type=0; type<3; type++){
1878                     int direction = 0;
1879                     switch (type) {
1880                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1881                                 continue;
1882                               direction = 0;
1883                               break;
1884                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1885                                 continue;
1886                               direction = 0;
1887                               break;
1888                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1889                                 continue;
1890                               direction = 1;
1891                               break;
1892                     }
1893                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1894                         continue;
1895
1896                     if(IS_8X8(pict->mb_type[mb_index])){
1897                       int i;
1898                       for(i=0; i<4; i++){
1899                         int sx= mb_x*16 + 4 + 8*(i&1);
1900                         int sy= mb_y*16 + 4 + 8*(i>>1);
1901                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1902                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1903                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1904                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1905                       }
1906                     }else if(IS_16X8(pict->mb_type[mb_index])){
1907                       int i;
1908                       for(i=0; i<2; i++){
1909                         int sx=mb_x*16 + 8;
1910                         int sy=mb_y*16 + 4 + 8*i;
1911                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1912                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1913                         int my=(pict->motion_val[direction][xy][1]>>shift);
1914
1915                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1916                             my*=2;
1917
1918                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1919                       }
1920                     }else if(IS_8X16(pict->mb_type[mb_index])){
1921                       int i;
1922                       for(i=0; i<2; i++){
1923                         int sx=mb_x*16 + 4 + 8*i;
1924                         int sy=mb_y*16 + 8;
1925                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1926                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1927                         int my=(pict->motion_val[direction][xy][1]>>shift);
1928
1929                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1930                             my*=2;
1931
1932                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1933                       }
1934                     }else{
1935                       int sx= mb_x*16 + 8;
1936                       int sy= mb_y*16 + 8;
1937                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1938                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1939                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1940                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1941                     }
1942                   }
1943                 }
1944                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1945                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1946                     int y;
1947                     for(y=0; y<8; y++){
1948                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1949                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1950                     }
1951                 }
1952                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1953                     int mb_type= pict->mb_type[mb_index];
1954                     uint64_t u,v;
1955                     int y;
1956 #define COLOR(theta, r)\
1957 u= (int)(128 + r*cos(theta*3.141592/180));\
1958 v= (int)(128 + r*sin(theta*3.141592/180));
1959
1960
1961                     u=v=128;
1962                     if(IS_PCM(mb_type)){
1963                         COLOR(120,48)
1964                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1965                         COLOR(30,48)
1966                     }else if(IS_INTRA4x4(mb_type)){
1967                         COLOR(90,48)
1968                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1969 //                        COLOR(120,48)
1970                     }else if(IS_DIRECT(mb_type)){
1971                         COLOR(150,48)
1972                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1973                         COLOR(170,48)
1974                     }else if(IS_GMC(mb_type)){
1975                         COLOR(190,48)
1976                     }else if(IS_SKIP(mb_type)){
1977 //                        COLOR(180,48)
1978                     }else if(!USES_LIST(mb_type, 1)){
1979                         COLOR(240,48)
1980                     }else if(!USES_LIST(mb_type, 0)){
1981                         COLOR(0,48)
1982                     }else{
1983                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1984                         COLOR(300,48)
1985                     }
1986
1987                     u*= 0x0101010101010101ULL;
1988                     v*= 0x0101010101010101ULL;
1989                     for(y=0; y<8; y++){
1990                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1991                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1992                     }
1993
1994                     //segmentation
1995                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1996                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1997                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1998                     }
1999                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
2000                         for(y=0; y<16; y++)
2001                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
2002                     }
2003                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
2004                         int dm= 1 << (mv_sample_log2-2);
2005                         for(i=0; i<4; i++){
2006                             int sx= mb_x*16 + 8*(i&1);
2007                             int sy= mb_y*16 + 8*(i>>1);
2008                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
2009                             //FIXME bidir
2010                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
2011                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
2012                                 for(y=0; y<8; y++)
2013                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
2014                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
2015                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
2016                         }
2017                     }
2018
2019                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
2020                         // hmm
2021                     }
2022                 }
2023                 s->mbskip_table[mb_index]=0;
2024             }
2025         }
2026     }
2027 }
2028
2029 #ifdef CONFIG_ENCODERS
2030
2031 static int get_sae(uint8_t *src, int ref, int stride){
2032     int x,y;
2033     int acc=0;
2034
2035     for(y=0; y<16; y++){
2036         for(x=0; x<16; x++){
2037             acc+= ABS(src[x+y*stride] - ref);
2038         }
2039     }
2040
2041     return acc;
2042 }
2043
2044 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2045     int x, y, w, h;
2046     int acc=0;
2047
2048     w= s->width &~15;
2049     h= s->height&~15;
2050
2051     for(y=0; y<h; y+=16){
2052         for(x=0; x<w; x+=16){
2053             int offset= x + y*stride;
2054             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2055             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2056             int sae = get_sae(src + offset, mean, stride);
2057
2058             acc+= sae + 500 < sad;
2059         }
2060     }
2061     return acc;
2062 }
2063
2064
2065 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2066     AVFrame *pic=NULL;
2067     int64_t pts;
2068     int i;
2069     const int encoding_delay= s->max_b_frames;
2070     int direct=1;
2071
2072     if(pic_arg){
2073         pts= pic_arg->pts;
2074         pic_arg->display_picture_number= s->input_picture_number++;
2075
2076         if(pts != AV_NOPTS_VALUE){
2077             if(s->user_specified_pts != AV_NOPTS_VALUE){
2078                 int64_t time= pts;
2079                 int64_t last= s->user_specified_pts;
2080
2081                 if(time <= last){
2082                     av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2083                     return -1;
2084                 }
2085             }
2086             s->user_specified_pts= pts;
2087         }else{
2088             if(s->user_specified_pts != AV_NOPTS_VALUE){
2089                 s->user_specified_pts=
2090                 pts= s->user_specified_pts + 1;
2091                 av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2092             }else{
2093                 pts= pic_arg->display_picture_number;
2094             }
2095         }
2096     }
2097
2098   if(pic_arg){
2099     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2100     if(pic_arg->linesize[0] != s->linesize) direct=0;
2101     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2102     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2103
2104 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2105
2106     if(direct){
2107         i= ff_find_unused_picture(s, 1);
2108
2109         pic= (AVFrame*)&s->picture[i];
2110         pic->reference= 3;
2111
2112         for(i=0; i<4; i++){
2113             pic->data[i]= pic_arg->data[i];
2114             pic->linesize[i]= pic_arg->linesize[i];
2115         }
2116         alloc_picture(s, (Picture*)pic, 1);
2117     }else{
2118         i= ff_find_unused_picture(s, 0);
2119
2120         pic= (AVFrame*)&s->picture[i];
2121         pic->reference= 3;
2122
2123         alloc_picture(s, (Picture*)pic, 0);
2124
2125         if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2126            && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2127            && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2128        // empty
2129         }else{
2130             int h_chroma_shift, v_chroma_shift;
2131             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2132
2133             for(i=0; i<3; i++){
2134                 int src_stride= pic_arg->linesize[i];
2135                 int dst_stride= i ? s->uvlinesize : s->linesize;
2136                 int h_shift= i ? h_chroma_shift : 0;
2137                 int v_shift= i ? v_chroma_shift : 0;
2138                 int w= s->width >>h_shift;
2139                 int h= s->height>>v_shift;
2140                 uint8_t *src= pic_arg->data[i];
2141                 uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2142
2143                 if(src_stride==dst_stride)
2144                     memcpy(dst, src, src_stride*h);
2145                 else{
2146                     while(h--){
2147                         memcpy(dst, src, w);
2148                         dst += dst_stride;
2149                         src += src_stride;
2150                     }
2151                 }
2152             }
2153         }
2154     }
2155     copy_picture_attributes(s, pic, pic_arg);
2156     pic->pts= pts; //we set this here to avoid modifiying pic_arg
2157   }
2158
2159     /* shift buffer entries */
2160     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2161         s->input_picture[i-1]= s->input_picture[i];
2162
2163     s->input_picture[encoding_delay]= (Picture*)pic;
2164
2165     return 0;
2166 }
2167
2168 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2169     int x, y, plane;
2170     int score=0;
2171     int64_t score64=0;
2172
2173     for(plane=0; plane<3; plane++){
2174         const int stride= p->linesize[plane];
2175         const int bw= plane ? 1 : 2;
2176         for(y=0; y<s->mb_height*bw; y++){
2177             for(x=0; x<s->mb_width*bw; x++){
2178                 int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2179                 int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2180
2181                 switch(s->avctx->frame_skip_exp){
2182                     case 0: score= FFMAX(score, v); break;
2183                     case 1: score+= ABS(v);break;
2184                     case 2: score+= v*v;break;
2185                     case 3: score64+= ABS(v*v*(int64_t)v);break;
2186                     case 4: score64+= v*v*(int64_t)(v*v);break;
2187                 }
2188             }
2189         }
2190     }
2191
2192     if(score) score64= score;
2193
2194     if(score64 < s->avctx->frame_skip_threshold)
2195         return 1;
2196     if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2197         return 1;
2198     return 0;
2199 }
2200
2201 static int estimate_best_b_count(MpegEncContext *s){
2202     AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2203     AVCodecContext *c= avcodec_alloc_context();
2204     AVFrame input[FF_MAX_B_FRAMES+2];
2205     const int scale= s->avctx->brd_scale;
2206     int i, j, out_size, p_lambda, b_lambda, lambda2;
2207     int outbuf_size= s->width * s->height; //FIXME
2208     uint8_t *outbuf= av_malloc(outbuf_size);
2209     int64_t best_rd= INT64_MAX;
2210     int best_b_count= -1;
2211
2212     assert(scale>=0 && scale <=3);
2213
2214 //    emms_c();
2215     p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2216     b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2217     if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2218     lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2219
2220     c->width = s->width >> scale;
2221     c->height= s->height>> scale;
2222     c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2223     c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2224     c->mb_decision= s->avctx->mb_decision;
2225     c->me_cmp= s->avctx->me_cmp;
2226     c->mb_cmp= s->avctx->mb_cmp;
2227     c->me_sub_cmp= s->avctx->me_sub_cmp;
2228     c->pix_fmt = PIX_FMT_YUV420P;
2229     c->time_base= s->avctx->time_base;
2230     c->max_b_frames= s->max_b_frames;
2231
2232     if (avcodec_open(c, codec) < 0)
2233         return -1;
2234
2235     for(i=0; i<s->max_b_frames+2; i++){
2236         int ysize= c->width*c->height;
2237         int csize= (c->width/2)*(c->height/2);
2238         Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2239
2240         if(pre_input_ptr)
2241             pre_input= *pre_input_ptr;
2242
2243         if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2244             pre_input.data[0]+=INPLACE_OFFSET;
2245             pre_input.data[1]+=INPLACE_OFFSET;
2246             pre_input.data[2]+=INPLACE_OFFSET;
2247         }
2248
2249         avcodec_get_frame_defaults(&input[i]);
2250         input[i].data[0]= av_malloc(ysize + 2*csize);
2251         input[i].data[1]= input[i].data[0] + ysize;
2252         input[i].data[2]= input[i].data[1] + csize;
2253         input[i].linesize[0]= c->width;
2254         input[i].linesize[1]=
2255         input[i].linesize[2]= c->width/2;
2256
2257         if(!i || s->input_picture[i-1]){
2258             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2259             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2260             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2261         }
2262     }
2263
2264     for(j=0; j<s->max_b_frames+1; j++){
2265         int64_t rd=0;
2266
2267         if(!s->input_picture[j])
2268             break;
2269
2270         c->error[0]= c->error[1]= c->error[2]= 0;
2271
2272         input[0].pict_type= I_TYPE;
2273         input[0].quality= 1 * FF_QP2LAMBDA;
2274         out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2275 //        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2276
2277         for(i=0; i<s->max_b_frames+1; i++){
2278             int is_p= i % (j+1) == j || i==s->max_b_frames;
2279
2280             input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2281             input[i+1].quality= is_p ? p_lambda : b_lambda;
2282             out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2283             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2284         }
2285
2286         /* get the delayed frames */
2287         while(out_size){
2288             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2289             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2290         }
2291
2292         rd += c->error[0] + c->error[1] + c->error[2];
2293
2294         if(rd < best_rd){
2295             best_rd= rd;
2296             best_b_count= j;
2297         }
2298     }
2299
2300     av_freep(&outbuf);
2301     avcodec_close(c);
2302     av_freep(&c);
2303
2304     for(i=0; i<s->max_b_frames+2; i++){
2305         av_freep(&input[i].data[0]);
2306     }
2307
2308     return best_b_count;
2309 }
2310
2311 static void select_input_picture(MpegEncContext *s){
2312     int i;
2313
2314     for(i=1; i<MAX_PICTURE_COUNT; i++)
2315         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2316     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2317
2318     /* set next picture type & ordering */
2319     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2320         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2321             s->reordered_input_picture[0]= s->input_picture[0];
2322             s->reordered_input_picture[0]->pict_type= I_TYPE;
2323             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2324         }else{
2325             int b_frames;
2326
2327             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2328                 if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2329                 //FIXME check that te gop check above is +-1 correct
2330 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2331
2332                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2333                         for(i=0; i<4; i++)
2334                             s->input_picture[0]->data[i]= NULL;
2335                         s->input_picture[0]->type= 0;
2336                     }else{
2337                         assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2338                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2339
2340                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2341                     }
2342
2343                     emms_c();
2344                     ff_vbv_update(s, 0);
2345
2346                     goto no_output_pic;
2347                 }
2348             }
2349
2350             if(s->flags&CODEC_FLAG_PASS2){
2351                 for(i=0; i<s->max_b_frames+1; i++){
2352                     int pict_num= s->input_picture[0]->display_picture_number + i;
2353
2354                     if(pict_num >= s->rc_context.num_entries)
2355                         break;
2356                     if(!s->input_picture[i]){
2357                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2358                         break;
2359                     }
2360
2361                     s->input_picture[i]->pict_type=
2362                         s->rc_context.entry[pict_num].new_pict_type;
2363                 }
2364             }
2365
2366             if(s->avctx->b_frame_strategy==0){
2367                 b_frames= s->max_b_frames;
2368                 while(b_frames && !s->input_picture[b_frames]) b_frames--;
2369             }else if(s->avctx->b_frame_strategy==1){
2370                 for(i=1; i<s->max_b_frames+1; i++){
2371                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2372                         s->input_picture[i]->b_frame_score=
2373                             get_intra_count(s, s->input_picture[i  ]->data[0],
2374                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
2375                     }
2376                 }
2377                 for(i=0; i<s->max_b_frames+1; i++){
2378                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break;
2379                 }
2380
2381                 b_frames= FFMAX(0, i-1);
2382
2383                 /* reset scores */
2384                 for(i=0; i<b_frames+1; i++){
2385                     s->input_picture[i]->b_frame_score=0;
2386                 }
2387             }else if(s->avctx->b_frame_strategy==2){
2388                 b_frames= estimate_best_b_count(s);
2389             }else{
2390                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2391                 b_frames=0;
2392             }
2393
2394             emms_c();
2395 //static int b_count=0;
2396 //b_count+= b_frames;
2397 //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2398
2399             for(i= b_frames - 1; i>=0; i--){
2400                 int type= s->input_picture[i]->pict_type;
2401                 if(type && type != B_TYPE)
2402                     b_frames= i;
2403             }
2404             if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2405                 av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2406             }
2407
2408             if(s->picture_in_gop_number + b_frames >= s->gop_size){
2409               if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2410                     b_frames= s->gop_size - s->picture_in_gop_number - 1;
2411               }else{
2412                 if(s->flags & CODEC_FLAG_CLOSED_GOP)
2413                     b_frames=0;
2414                 s->input_picture[b_frames]->pict_type= I_TYPE;
2415               }
2416             }
2417
2418             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2419                && b_frames
2420                && s->input_picture[b_frames]->pict_type== I_TYPE)
2421                 b_frames--;
2422
2423             s->reordered_input_picture[0]= s->input_picture[b_frames];
2424             if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2425                 s->reordered_input_picture[0]->pict_type= P_TYPE;
2426             s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2427             for(i=0; i<b_frames; i++){
2428                 s->reordered_input_picture[i+1]= s->input_picture[i];
2429                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2430                 s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2431             }
2432         }
2433     }
2434 no_output_pic:
2435     if(s->reordered_input_picture[0]){
2436         s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2437
2438         copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2439
2440         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2441             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2442
2443             int i= ff_find_unused_picture(s, 0);
2444             Picture *pic= &s->picture[i];
2445
2446             /* mark us unused / free shared pic */
2447             for(i=0; i<4; i++)
2448                 s->reordered_input_picture[0]->data[i]= NULL;
2449             s->reordered_input_picture[0]->type= 0;
2450
2451             pic->reference              = s->reordered_input_picture[0]->reference;
2452
2453             alloc_picture(s, pic, 0);
2454
2455             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2456
2457             s->current_picture_ptr= pic;
2458         }else{
2459             // input is not a shared pix -> reuse buffer for current_pix
2460
2461             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2462                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2463
2464             s->current_picture_ptr= s->reordered_input_picture[0];
2465             for(i=0; i<4; i++){
2466                 s->new_picture.data[i]+= INPLACE_OFFSET;
2467             }
2468         }
2469         copy_picture(&s->current_picture, s->current_picture_ptr);
2470
2471         s->picture_number= s->new_picture.display_picture_number;
2472 //printf("dpn:%d\n", s->picture_number);
2473     }else{
2474        memset(&s->new_picture, 0, sizeof(Picture));
2475     }
2476 }
2477
2478 int MPV_encode_picture(AVCodecContext *avctx,
2479                        unsigned char *buf, int buf_size, void *data)
2480 {
2481     MpegEncContext *s = avctx->priv_data;
2482     AVFrame *pic_arg = data;
2483     int i, stuffing_count;
2484
2485     for(i=0; i<avctx->thread_count; i++){
2486         int start_y= s->thread_context[i]->start_mb_y;
2487         int   end_y= s->thread_context[i]->  end_mb_y;
2488         int h= s->mb_height;
2489         uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2490         uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2491
2492         init_put_bits(&s->thread_context[i]->pb, start, end - start);
2493     }
2494
2495     s->picture_in_gop_number++;
2496
2497     if(load_input_picture(s, pic_arg) < 0)
2498         return -1;
2499
2500     select_input_picture(s);
2501
2502     /* output? */
2503     if(s->new_picture.data[0]){
2504         s->pict_type= s->new_picture.pict_type;
2505 //emms_c();
2506 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2507         MPV_frame_start(s, avctx);
2508
2509         if (encode_picture(s, s->picture_number) < 0)
2510             return -1;
2511
2512         avctx->real_pict_num  = s->picture_number;
2513         avctx->header_bits = s->header_bits;
2514         avctx->mv_bits     = s->mv_bits;
2515         avctx->misc_bits   = s->misc_bits;
2516         avctx->i_tex_bits  = s->i_tex_bits;
2517         avctx->p_tex_bits  = s->p_tex_bits;
2518         avctx->i_count     = s->i_count;
2519         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2520         avctx->skip_count  = s->skip_count;
2521
2522         MPV_frame_end(s);
2523
2524         if (s->out_format == FMT_MJPEG)
2525             mjpeg_picture_trailer(s);
2526
2527         if(s->flags&CODEC_FLAG_PASS1)
2528             ff_write_pass1_stats(s);
2529
2530         for(i=0; i<4; i++){
2531             s->current_picture_ptr->error[i]= s->current_picture.error[i];
2532             avctx->error[i] += s->current_picture_ptr->error[i];
2533         }
2534
2535         if(s->flags&CODEC_FLAG_PASS1)
2536             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2537         flush_put_bits(&s->pb);
2538         s->frame_bits  = put_bits_count(&s->pb);
2539
2540         stuffing_count= ff_vbv_update(s, s->frame_bits);
2541         if(stuffing_count){
2542             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2543                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2544                 return -1;
2545             }
2546
2547             switch(s->codec_id){
2548             case CODEC_ID_MPEG1VIDEO:
2549             case CODEC_ID_MPEG2VIDEO:
2550                 while(stuffing_count--){
2551                     put_bits(&s->pb, 8, 0);
2552                 }
2553             break;
2554             case CODEC_ID_MPEG4:
2555                 put_bits(&s->pb, 16, 0);
2556                 put_bits(&s->pb, 16, 0x1C3);
2557                 stuffing_count -= 4;
2558                 while(stuffing_count--){
2559                     put_bits(&s->pb, 8, 0xFF);
2560                 }
2561             break;
2562             default:
2563                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2564             }
2565             flush_put_bits(&s->pb);
2566             s->frame_bits  = put_bits_count(&s->pb);
2567         }
2568
2569         /* update mpeg1/2 vbv_delay for CBR */
2570         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2571            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2572             int vbv_delay;
2573
2574             assert(s->repeat_first_field==0);
2575
2576             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2577             assert(vbv_delay < 0xFFFF);
2578
2579             s->vbv_delay_ptr[0] &= 0xF8;
2580             s->vbv_delay_ptr[0] |= vbv_delay>>13;
2581             s->vbv_delay_ptr[1]  = vbv_delay>>5;
2582             s->vbv_delay_ptr[2] &= 0x07;
2583             s->vbv_delay_ptr[2] |= vbv_delay<<3;
2584         }
2585         s->total_bits += s->frame_bits;
2586         avctx->frame_bits  = s->frame_bits;
2587     }else{
2588         assert((pbBufPtr(&s->pb) == s->pb.buf));
2589         s->frame_bits=0;
2590     }
2591     assert((s->frame_bits&7)==0);
2592
2593     return s->frame_bits/8;
2594 }
2595
2596 #endif //CONFIG_ENCODERS
2597
2598 static inline void gmc1_motion(MpegEncContext *s,
2599                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2600                                uint8_t **ref_picture)
2601 {
2602     uint8_t *ptr;
2603     int offset, src_x, src_y, linesize, uvlinesize;
2604     int motion_x, motion_y;
2605     int emu=0;
2606
2607     motion_x= s->sprite_offset[0][0];
2608     motion_y= s->sprite_offset[0][1];
2609     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2610     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2611     motion_x<<=(3-s->sprite_warping_accuracy);
2612     motion_y<<=(3-s->sprite_warping_accuracy);
2613     src_x = clip(src_x, -16, s->width);
2614     if (src_x == s->width)
2615         motion_x =0;
2616     src_y = clip(src_y, -16, s->height);
2617     if (src_y == s->height)
2618         motion_y =0;
2619
2620     linesize = s->linesize;
2621     uvlinesize = s->uvlinesize;
2622
2623     ptr = ref_picture[0] + (src_y * linesize) + src_x;
2624
2625     if(s->flags&CODEC_FLAG_EMU_EDGE){
2626         if(   (unsigned)src_x >= s->h_edge_pos - 17
2627            || (unsigned)src_y >= s->v_edge_pos - 17){
2628             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2629             ptr= s->edge_emu_buffer;
2630         }
2631     }
2632
2633     if((motion_x|motion_y)&7){
2634         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2635         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2636     }else{
2637         int dxy;
2638
2639         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2640         if (s->no_rounding){
2641             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2642         }else{
2643             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2644         }
2645     }
2646
2647     if(s->flags&CODEC_FLAG_GRAY) return;
2648
2649     motion_x= s->sprite_offset[1][0];
2650     motion_y= s->sprite_offset[1][1];
2651     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2652     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2653     motion_x<<=(3-s->sprite_warping_accuracy);
2654     motion_y<<=(3-s->sprite_warping_accuracy);
2655     src_x = clip(src_x, -8, s->width>>1);
2656     if (src_x == s->width>>1)
2657         motion_x =0;
2658     src_y = clip(src_y, -8, s->height>>1);
2659     if (src_y == s->height>>1)
2660         motion_y =0;
2661
2662     offset = (src_y * uvlinesize) + src_x;
2663     ptr = ref_picture[1] + offset;
2664     if(s->flags&CODEC_FLAG_EMU_EDGE){
2665         if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2666            || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2667             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2668             ptr= s->edge_emu_buffer;
2669             emu=1;
2670         }
2671     }
2672     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2673
2674     ptr = ref_picture[2] + offset;
2675     if(emu){
2676         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2677         ptr= s->edge_emu_buffer;
2678     }
2679     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2680
2681     return;
2682 }
2683
2684 static inline void gmc_motion(MpegEncContext *s,
2685                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2686                                uint8_t **ref_picture)
2687 {
2688     uint8_t *ptr;
2689     int linesize, uvlinesize;
2690     const int a= s->sprite_warping_accuracy;
2691     int ox, oy;
2692
2693     linesize = s->linesize;
2694     uvlinesize = s->uvlinesize;
2695
2696     ptr = ref_picture[0];
2697
2698     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2699     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2700
2701     s->dsp.gmc(dest_y, ptr, linesize, 16,
2702            ox,
2703            oy,
2704            s->sprite_delta[0][0], s->sprite_delta[0][1],
2705            s->sprite_delta[1][0], s->sprite_delta[1][1],
2706            a+1, (1<<(2*a+1)) - s->no_rounding,
2707            s->h_edge_pos, s->v_edge_pos);
2708     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2709            ox + s->sprite_delta[0][0]*8,
2710            oy + s->sprite_delta[1][0]*8,
2711            s->sprite_delta[0][0], s->sprite_delta[0][1],
2712            s->sprite_delta[1][0], s->sprite_delta[1][1],
2713            a+1, (1<<(2*a+1)) - s->no_rounding,
2714            s->h_edge_pos, s->v_edge_pos);
2715
2716     if(s->flags&CODEC_FLAG_GRAY) return;
2717
2718     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2719     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2720
2721     ptr = ref_picture[1];
2722     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2723            ox,
2724            oy,
2725            s->sprite_delta[0][0], s->sprite_delta[0][1],
2726            s->sprite_delta[1][0], s->sprite_delta[1][1],
2727            a+1, (1<<(2*a+1)) - s->no_rounding,
2728            s->h_edge_pos>>1, s->v_edge_pos>>1);
2729
2730     ptr = ref_picture[2];
2731     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2732            ox,
2733            oy,
2734            s->sprite_delta[0][0], s->sprite_delta[0][1],
2735            s->sprite_delta[1][0], s->sprite_delta[1][1],
2736            a+1, (1<<(2*a+1)) - s->no_rounding,
2737            s->h_edge_pos>>1, s->v_edge_pos>>1);
2738 }
2739
2740 /**
2741  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2742  * @param buf destination buffer
2743  * @param src source buffer
2744  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2745  * @param block_w width of block
2746  * @param block_h height of block
2747  * @param src_x x coordinate of the top left sample of the block in the source buffer
2748  * @param src_y y coordinate of the top left sample of the block in the source buffer
2749  * @param w width of the source buffer
2750  * @param h height of the source buffer
2751  */
2752 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2753                                     int src_x, int src_y, int w, int h){
2754     int x, y;
2755     int start_y, start_x, end_y, end_x;
2756
2757     if(src_y>= h){
2758         src+= (h-1-src_y)*linesize;
2759         src_y=h-1;
2760     }else if(src_y<=-block_h){
2761         src+= (1-block_h-src_y)*linesize;
2762         src_y=1-block_h;
2763     }
2764     if(src_x>= w){
2765         src+= (w-1-src_x);
2766         src_x=w-1;
2767     }else if(src_x<=-block_w){
2768         src+= (1-block_w-src_x);
2769         src_x=1-block_w;
2770     }
2771
2772     start_y= FFMAX(0, -src_y);
2773     start_x= FFMAX(0, -src_x);
2774     end_y= FFMIN(block_h, h-src_y);
2775     end_x= FFMIN(block_w, w-src_x);
2776
2777     // copy existing part
2778     for(y=start_y; y<end_y; y++){
2779         for(x=start_x; x<end_x; x++){
2780             buf[x + y*linesize]= src[x + y*linesize];
2781         }
2782     }
2783
2784     //top
2785     for(y=0; y<start_y; y++){
2786         for(x=start_x; x<end_x; x++){
2787             buf[x + y*linesize]= buf[x + start_y*linesize];
2788         }
2789     }
2790
2791     //bottom
2792     for(y=end_y; y<block_h; y++){
2793         for(x=start_x; x<end_x; x++){
2794             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2795         }
2796     }
2797
2798     for(y=0; y<block_h; y++){
2799        //left
2800         for(x=0; x<start_x; x++){
2801             buf[x + y*linesize]= buf[start_x + y*linesize];
2802         }
2803
2804        //right
2805         for(x=end_x; x<block_w; x++){
2806             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2807         }
2808     }
2809 }
2810
2811 static inline int hpel_motion(MpegEncContext *s,
2812                                   uint8_t *dest, uint8_t *src,
2813                                   int field_based, int field_select,
2814                                   int src_x, int src_y,
2815                                   int width, int height, int stride,
2816                                   int h_edge_pos, int v_edge_pos,
2817                                   int w, int h, op_pixels_func *pix_op,
2818                                   int motion_x, int motion_y)
2819 {
2820     int dxy;
2821     int emu=0;
2822
2823     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2824     src_x += motion_x >> 1;
2825     src_y += motion_y >> 1;
2826
2827     /* WARNING: do no forget half pels */
2828     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2829     if (src_x == width)
2830         dxy &= ~1;
2831     src_y = clip(src_y, -16, height);
2832     if (src_y == height)
2833         dxy &= ~2;
2834     src += src_y * stride + src_x;
2835
2836     if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2837         if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2838            || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2839             ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2840                              src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2841             src= s->edge_emu_buffer;
2842             emu=1;
2843         }
2844     }
2845     if(field_select)
2846         src += s->linesize;
2847     pix_op[dxy](dest, src, stride, h);
2848     return emu;
2849 }
2850
2851 static inline int hpel_motion_lowres(MpegEncContext *s,
2852                                   uint8_t *dest, uint8_t *src,
2853                                   int field_based, int field_select,
2854                                   int src_x, int src_y,
2855                                   int width, int height, int stride,
2856                                   int h_edge_pos, int v_edge_pos,
2857                                   int w, int h, h264_chroma_mc_func *pix_op,
2858                                   int motion_x, int motion_y)
2859 {
2860     const int lowres= s->avctx->lowres;
2861     const int s_mask= (2<<lowres)-1;
2862     int emu=0;
2863     int sx, sy;
2864
2865     if(s->quarter_sample){
2866         motion_x/=2;
2867         motion_y/=2;
2868     }
2869
2870     sx= motion_x & s_mask;
2871     sy= motion_y & s_mask;
2872     src_x += motion_x >> (lowres+1);
2873     src_y += motion_y >> (lowres+1);
2874
2875     src += src_y * stride + src_x;
2876
2877     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2878        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2879         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2880                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2881         src= s->edge_emu_buffer;
2882         emu=1;
2883     }
2884
2885     sx <<= 2 - lowres;
2886     sy <<= 2 - lowres;
2887     if(field_select)
2888         src += s->linesize;
2889     pix_op[lowres](dest, src, stride, h, sx, sy);
2890     return emu;
2891 }
2892
2893 /* apply one mpeg motion vector to the three components */
2894 static always_inline void mpeg_motion(MpegEncContext *s,
2895                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2896                                int field_based, int bottom_field, int field_select,
2897                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2898                                int motion_x, int motion_y, int h)
2899 {
2900     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2901     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2902
2903 #if 0
2904 if(s->quarter_sample)
2905 {
2906     motion_x>>=1;
2907     motion_y>>=1;
2908 }
2909 #endif
2910
2911     v_edge_pos = s->v_edge_pos >> field_based;
2912     linesize   = s->current_picture.linesize[0] << field_based;
2913     uvlinesize = s->current_picture.linesize[1] << field_based;
2914
2915     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2916     src_x = s->mb_x* 16               + (motion_x >> 1);
2917     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2918
2919     if (s->out_format == FMT_H263) {
2920         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2921             mx = (motion_x>>1)|(motion_x&1);
2922             my = motion_y >>1;
2923             uvdxy = ((my & 1) << 1) | (mx & 1);
2924             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2925             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2926         }else{
2927             uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2928             uvsrc_x = src_x>>1;
2929             uvsrc_y = src_y>>1;
2930         }
2931     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2932         mx = motion_x / 4;
2933         my = motion_y / 4;
2934         uvdxy = 0;
2935         uvsrc_x = s->mb_x*8 + mx;
2936         uvsrc_y = s->mb_y*8 + my;
2937     } else {
2938         if(s->chroma_y_shift){
2939             mx = motion_x / 2;
2940             my = motion_y / 2;
2941             uvdxy = ((my & 1) << 1) | (mx & 1);
2942             uvsrc_x = s->mb_x* 8               + (mx >> 1);
2943             uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2944         } else {
2945             if(s->chroma_x_shift){
2946             //Chroma422
2947                 mx = motion_x / 2;
2948                 uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2949                 uvsrc_x = s->mb_x* 8           + (mx >> 1);
2950                 uvsrc_y = src_y;
2951             } else {
2952             //Chroma444
2953                 uvdxy = dxy;
2954                 uvsrc_x = src_x;
2955                 uvsrc_y = src_y;
2956             }
2957         }
2958     }
2959
2960     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2961     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2962     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2963
2964     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2965        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2966             if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2967                s->codec_id == CODEC_ID_MPEG1VIDEO){
2968                 av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2969                 return ;
2970             }
2971             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2972                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2973             ptr_y = s->edge_emu_buffer;
2974             if(!(s->flags&CODEC_FLAG_GRAY)){
2975                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2976                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2977                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2978                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2979                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2980                 ptr_cb= uvbuf;
2981                 ptr_cr= uvbuf+16;
2982             }
2983     }
2984
2985     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2986         dest_y += s->linesize;
2987         dest_cb+= s->uvlinesize;
2988         dest_cr+= s->uvlinesize;
2989     }
2990
2991     if(field_select){
2992         ptr_y += s->linesize;
2993         ptr_cb+= s->uvlinesize;
2994         ptr_cr+= s->uvlinesize;
2995     }
2996
2997     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2998
2999     if(!(s->flags&CODEC_FLAG_GRAY)){
3000         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
3001         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
3002     }
3003 #if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
3004     if(s->out_format == FMT_H261){
3005         ff_h261_loop_filter(s);
3006     }
3007 #endif
3008 }
3009
3010 /* apply one mpeg motion vector to the three components */
3011 static always_inline void mpeg_motion_lowres(MpegEncContext *s,
3012                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3013                                int field_based, int bottom_field, int field_select,
3014                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
3015                                int motion_x, int motion_y, int h)
3016 {
3017     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3018     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3019     const int lowres= s->avctx->lowres;
3020     const int block_s= 8>>lowres;
3021     const int s_mask= (2<<lowres)-1;
3022     const int h_edge_pos = s->h_edge_pos >> lowres;
3023     const int v_edge_pos = s->v_edge_pos >> lowres;
3024     linesize   = s->current_picture.linesize[0] << field_based;
3025     uvlinesize = s->current_picture.linesize[1] << field_based;
3026
3027     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3028         motion_x/=2;
3029         motion_y/=2;
3030     }
3031
3032     if(field_based){
3033         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3034     }
3035
3036     sx= motion_x & s_mask;
3037     sy= motion_y & s_mask;
3038     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3039     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3040
3041     if (s->out_format == FMT_H263) {
3042         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3043         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3044         uvsrc_x = src_x>>1;
3045         uvsrc_y = src_y>>1;
3046     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3047         mx = motion_x / 4;
3048         my = motion_y / 4;
3049         uvsx = (2*mx) & s_mask;
3050         uvsy = (2*my) & s_mask;
3051         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3052         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3053     } else {
3054         mx = motion_x / 2;
3055         my = motion_y / 2;
3056         uvsx = mx & s_mask;
3057         uvsy = my & s_mask;
3058         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3059         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3060     }
3061
3062     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3063     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3064     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3065
3066     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3067        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3068             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3069                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3070             ptr_y = s->edge_emu_buffer;
3071             if(!(s->flags&CODEC_FLAG_GRAY)){
3072                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3073                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3074                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3075                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3076                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3077                 ptr_cb= uvbuf;
3078                 ptr_cr= uvbuf+16;
3079             }
3080     }
3081
3082     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3083         dest_y += s->linesize;
3084         dest_cb+= s->uvlinesize;
3085         dest_cr+= s->uvlinesize;
3086     }
3087
3088     if(field_select){
3089         ptr_y += s->linesize;
3090         ptr_cb+= s->uvlinesize;
3091         ptr_cr+= s->uvlinesize;
3092     }
3093
3094     sx <<= 2 - lowres;
3095     sy <<= 2 - lowres;
3096     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3097
3098     if(!(s->flags&CODEC_FLAG_GRAY)){
3099         uvsx <<= 2 - lowres;
3100         uvsy <<= 2 - lowres;
3101         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3102         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3103     }
3104     //FIXME h261 lowres loop filter
3105 }
3106
3107 //FIXME move to dsputil, avg variant, 16x16 version
3108 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3109     int x;
3110     uint8_t * const top   = src[1];
3111     uint8_t * const left  = src[2];
3112     uint8_t * const mid   = src[0];
3113     uint8_t * const right = src[3];
3114     uint8_t * const bottom= src[4];
3115 #define OBMC_FILTER(x, t, l, m, r, b)\
3116     dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3117 #define OBMC_FILTER4(x, t, l, m, r, b)\
3118     OBMC_FILTER(x         , t, l, m, r, b);\
3119     OBMC_FILTER(x+1       , t, l, m, r, b);\
3120     OBMC_FILTER(x  +stride, t, l, m, r, b);\
3121     OBMC_FILTER(x+1+stride, t, l, m, r, b);
3122
3123     x=0;
3124     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3125     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3126     OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3127     OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3128     OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3129     OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3130     x+= stride;
3131     OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3132     OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3133     OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3134     OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3135     x+= stride;
3136     OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3137     OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3138     OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3139     OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3140     x+= 2*stride;
3141     OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3142     OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3143     OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3144     OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3145     x+= 2*stride;
3146     OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3147     OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3148     OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3149     OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3150     OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3151     OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3152     x+= stride;
3153     OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3154     OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3155     OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3156     OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3157 }
3158
3159 /* obmc for 1 8x8 luma block */
3160 static inline void obmc_motion(MpegEncContext *s,
3161                                uint8_t *dest, uint8_t *src,
3162                                int src_x, int src_y,
3163                                op_pixels_func *pix_op,
3164                                int16_t mv[5][2]/* mid top left right bottom*/)
3165 #define MID    0
3166 {
3167     int i;
3168     uint8_t *ptr[5];
3169
3170     assert(s->quarter_sample==0);
3171
3172     for(i=0; i<5; i++){
3173         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3174             ptr[i]= ptr[MID];
3175         }else{
3176             ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3177             hpel_motion(s, ptr[i], src, 0, 0,
3178                         src_x, src_y,
3179                         s->width, s->height, s->linesize,
3180                         s->h_edge_pos, s->v_edge_pos,
3181                         8, 8, pix_op,
3182                         mv[i][0], mv[i][1]);
3183         }
3184     }
3185
3186     put_obmc(dest, ptr, s->linesize);
3187 }
3188
3189 static inline void qpel_motion(MpegEncContext *s,
3190                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3191                                int field_based, int bottom_field, int field_select,
3192                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3193                                qpel_mc_func (*qpix_op)[16],
3194                                int motion_x, int motion_y, int h)
3195 {
3196     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3197     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3198
3199     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3200     src_x = s->mb_x *  16                 + (motion_x >> 2);
3201     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3202
3203     v_edge_pos = s->v_edge_pos >> field_based;
3204     linesize = s->linesize << field_based;
3205     uvlinesize = s->uvlinesize << field_based;
3206
3207     if(field_based){
3208         mx= motion_x/2;
3209         my= motion_y>>1;
3210     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3211         static const int rtab[8]= {0,0,1,1,0,0,0,1};
3212         mx= (motion_x>>1) + rtab[motion_x&7];
3213         my= (motion_y>>1) + rtab[motion_y&7];
3214     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3215         mx= (motion_x>>1)|(motion_x&1);
3216         my= (motion_y>>1)|(motion_y&1);
3217     }else{
3218         mx= motion_x/2;
3219         my= motion_y/2;
3220     }
3221     mx= (mx>>1)|(mx&1);
3222     my= (my>>1)|(my&1);
3223
3224     uvdxy= (mx&1) | ((my&1)<<1);
3225     mx>>=1;
3226     my>>=1;
3227
3228     uvsrc_x = s->mb_x *  8                 + mx;
3229     uvsrc_y = s->mb_y * (8 >> field_based) + my;
3230
3231     ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3232     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3233     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3234
3235     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3236        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3237         ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3238                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3239         ptr_y= s->edge_emu_buffer;
3240         if(!(s->flags&CODEC_FLAG_GRAY)){
3241             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3242             ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3243                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3244             ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3245                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3246             ptr_cb= uvbuf;
3247             ptr_cr= uvbuf + 16;
3248         }
3249     }
3250
3251     if(!field_based)
3252         qpix_op[0][dxy](dest_y, ptr_y, linesize);
3253     else{
3254         if(bottom_field){
3255             dest_y += s->linesize;
3256             dest_cb+= s->uvlinesize;
3257             dest_cr+= s->uvlinesize;
3258         }
3259
3260         if(field_select){
3261             ptr_y  += s->linesize;
3262             ptr_cb += s->uvlinesize;
3263             ptr_cr += s->uvlinesize;
3264         }
3265         //damn interlaced mode
3266         //FIXME boundary mirroring is not exactly correct here
3267         qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3268         qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3269     }
3270     if(!(s->flags&CODEC_FLAG_GRAY)){
3271         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3272         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3273     }
3274 }
3275
3276 inline int ff_h263_round_chroma(int x){
3277     if (x >= 0)
3278         return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3279     else {
3280         x = -x;
3281         return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3282     }
3283 }
3284
3285 /**
3286  * h263 chorma 4mv motion compensation.
3287  */
3288 static inline void chroma_4mv_motion(MpegEncContext *s,
3289                                      uint8_t *dest_cb, uint8_t *dest_cr,
3290                                      uint8_t **ref_picture,
3291                                      op_pixels_func *pix_op,
3292                                      int mx, int my){
3293     int dxy, emu=0, src_x, src_y, offset;
3294     uint8_t *ptr;
3295
3296     /* In case of 8X8, we construct a single chroma motion vector
3297        with a special rounding */
3298     mx= ff_h263_round_chroma(mx);
3299     my= ff_h263_round_chroma(my);
3300
3301     dxy = ((my & 1) << 1) | (mx & 1);
3302     mx >>= 1;
3303     my >>= 1;
3304
3305     src_x = s->mb_x * 8 + mx;
3306     src_y = s->mb_y * 8 + my;
3307     src_x = clip(src_x, -8, s->width/2);
3308     if (src_x == s->width/2)
3309         dxy &= ~1;
3310     src_y = clip(src_y, -8, s->height/2);
3311     if (src_y == s->height/2)
3312         dxy &= ~2;
3313
3314     offset = (src_y * (s->uvlinesize)) + src_x;
3315     ptr = ref_picture[1] + offset;
3316     if(s->flags&CODEC_FLAG_EMU_EDGE){
3317         if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3318            || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3319             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3320             ptr= s->edge_emu_buffer;
3321             emu=1;
3322         }
3323     }
3324     pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3325
3326     ptr = ref_picture[2] + offset;
3327     if(emu){
3328         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3329         ptr= s->edge_emu_buffer;
3330     }
3331     pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3332 }
3333
3334 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3335                                      uint8_t *dest_cb, uint8_t *dest_cr,
3336                                      uint8_t **ref_picture,
3337                                      h264_chroma_mc_func *pix_op,
3338                                      int mx, int my){
3339     const int lowres= s->avctx->lowres;
3340     const int block_s= 8>>lowres;
3341     const int s_mask= (2<<lowres)-1;
3342     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3343     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3344     int emu=0, src_x, src_y, offset, sx, sy;
3345     uint8_t *ptr;
3346
3347     if(s->quarter_sample){
3348         mx/=2;
3349         my/=2;
3350     }
3351
3352     /* In case of 8X8, we construct a single chroma motion vector
3353        with a special rounding */
3354     mx= ff_h263_round_chroma(mx);
3355     my= ff_h263_round_chroma(my);
3356
3357     sx= mx & s_mask;
3358     sy= my & s_mask;
3359     src_x = s->mb_x*block_s + (mx >> (lowres+1));
3360     src_y = s->mb_y*block_s + (my >> (lowres+1));
3361
3362     offset = src_y * s->uvlinesize + src_x;
3363     ptr = ref_picture[1] + offset;
3364     if(s->flags&CODEC_FLAG_EMU_EDGE){
3365         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3366            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3367             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3368             ptr= s->edge_emu_buffer;
3369             emu=1;
3370         }
3371     }
3372     sx <<= 2 - lowres;
3373     sy <<= 2 - lowres;
3374     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3375
3376     ptr = ref_picture[2] + offset;
3377     if(emu){
3378         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3379         ptr= s->edge_emu_buffer;
3380     }
3381     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3382 }
3383
3384 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3385     /* fetch pixels for estimated mv 4 macroblocks ahead
3386      * optimized for 64byte cache lines */
3387     const int shift = s->quarter_sample ? 2 : 1;
3388     const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3389     const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3390     int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3391     s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3392     off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3393     s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3394 }
3395
3396 /**
3397  * motion compensation of a single macroblock
3398  * @param s context
3399  * @param dest_y luma destination pointer
3400  * @param dest_cb chroma cb/u destination pointer
3401  * @param dest_cr chroma cr/v destination pointer
3402  * @param dir direction (0->forward, 1->backward)
3403  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3404  * @param pic_op halfpel motion compensation function (average or put normally)
3405  * @param pic_op qpel motion compensation function (average or put normally)
3406  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3407  */
3408 static inline void MPV_motion(MpegEncContext *s,
3409                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3410                               int dir, uint8_t **ref_picture,
3411                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3412 {
3413     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3414     int mb_x, mb_y, i;
3415     uint8_t *ptr, *dest;
3416
3417     mb_x = s->mb_x;
3418     mb_y = s->mb_y;
3419
3420     prefetch_motion(s, ref_picture, dir);
3421
3422     if(s->obmc && s->pict_type != B_TYPE){
3423         int16_t mv_cache[4][4][2];
3424         const int xy= s->mb_x + s->mb_y*s->mb_stride;
3425         const int mot_stride= s->b8_stride;
3426         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3427
3428         assert(!s->mb_skipped);
3429
3430         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3431         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3432         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3433
3434         if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3435             memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3436         }else{
3437             memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3438         }
3439
3440         if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3441             *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3442             *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3443         }else{
3444             *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3445             *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3446         }
3447
3448         if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3449             *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3450             *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3451         }else{
3452             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3453             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3454         }
3455
3456         mx = 0;
3457         my = 0;
3458         for(i=0;i<4;i++) {
3459             const int x= (i&1)+1;
3460             const int y= (i>>1)+1;
3461             int16_t mv[5][2]= {
3462                 {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3463                 {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3464                 {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3465                 {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3466                 {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3467             //FIXME cleanup
3468             obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3469                         ref_picture[0],
3470                         mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3471                         pix_op[1],
3472                         mv);
3473
3474             mx += mv[0][0];
3475             my += mv[0][1];
3476         }
3477         if(!(s->flags&CODEC_FLAG_GRAY))
3478             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3479
3480         return;
3481     }
3482
3483     switch(s->mv_type) {
3484     case MV_TYPE_16X16:
3485         if(s->mcsel){
3486             if(s->real_sprite_warping_points==1){
3487                 gmc1_motion(s, dest_y, dest_cb, dest_cr,
3488                             ref_picture);
3489             }else{
3490                 gmc_motion(s, dest_y, dest_cb, dest_cr,
3491                             ref_picture);
3492             }
3493         }else if(s->quarter_sample){
3494             qpel_motion(s, dest_y, dest_cb, dest_cr,
3495                         0, 0, 0,
3496                         ref_picture, pix_op, qpix_op,
3497                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3498         }else if(s->mspel){
3499             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3500                         ref_picture, pix_op,
3501                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3502         }else
3503         {
3504             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3505                         0, 0, 0,
3506                         ref_picture, pix_op,
3507                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3508         }
3509         break;
3510     case MV_TYPE_8X8:
3511         mx = 0;
3512         my = 0;
3513         if(s->quarter_sample){
3514             for(i=0;i<4;i++) {
3515                 motion_x = s->mv[dir][i][0];
3516                 motion_y = s->mv[dir][i][1];
3517
3518                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3519                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3520                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3521
3522                 /* WARNING: do no forget half pels */
3523                 src_x = clip(src_x, -16, s->width);
3524                 if (src_x == s->width)
3525                     dxy &= ~3;
3526                 src_y = clip(src_y, -16, s->height);
3527                 if (src_y == s->height)
3528                     dxy &= ~12;
3529
3530                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3531                 if(s->flags&CODEC_FLAG_EMU_EDGE){
3532                     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3533                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3534                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3535                         ptr= s->edge_emu_buffer;
3536                     }
3537                 }
3538                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3539                 qpix_op[1][dxy](dest, ptr, s->linesize);
3540
3541                 mx += s->mv[dir][i][0]/2;
3542                 my += s->mv[dir][i][1]/2;
3543             }
3544         }else{
3545             for(i=0;i<4;i++) {
3546                 hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3547                             ref_picture[0], 0, 0,
3548                             mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3549                             s->width, s->height, s->linesize,
3550                             s->h_edge_pos, s->v_edge_pos,
3551                             8, 8, pix_op[1],
3552                             s->mv[dir][i][0], s->mv[dir][i][1]);
3553
3554                 mx += s->mv[dir][i][0];
3555                 my += s->mv[dir][i][1];
3556             }
3557         }
3558
3559         if(!(s->flags&CODEC_FLAG_GRAY))
3560             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3561         break;
3562     case MV_TYPE_FIELD:
3563         if (s->picture_structure == PICT_FRAME) {
3564             if(s->quarter_sample){
3565                 for(i=0; i<2; i++){
3566                     qpel_motion(s, dest_y, dest_cb, dest_cr,
3567                                 1, i, s->field_select[dir][i],
3568                                 ref_picture, pix_op, qpix_op,
3569                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
3570                 }
3571             }else{
3572                 /* top field */
3573                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3574                             1, 0, s->field_select[dir][0],
3575                             ref_picture, pix_op,
3576                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
3577                 /* bottom field */
3578                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3579                             1, 1, s->field_select[dir][1],
3580                             ref_picture, pix_op,
3581                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
3582             }
3583         } else {
3584             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3585                 ref_picture= s->current_picture_ptr->data;
3586             }
3587
3588             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3589                         0, 0, s->field_select[dir][0],
3590                         ref_picture, pix_op,
3591                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
3592         }
3593         break;
3594     case MV_TYPE_16X8:
3595         for(i=0; i<2; i++){
3596             uint8_t ** ref2picture;
3597
3598             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3599                 ref2picture= ref_picture;
3600             }else{
3601                 ref2picture= s->current_picture_ptr->data;
3602             }
3603
3604             mpeg_motion(s, dest_y, dest_cb, dest_cr,
3605                         0, 0, s->field_select[dir][i],
3606                         ref2picture, pix_op,
3607                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3608
3609             dest_y += 16*s->linesize;
3610             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3611             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3612         }
3613         break;
3614     case MV_TYPE_DMV:
3615         if(s->picture_structure == PICT_FRAME){
3616             for(i=0; i<2; i++){
3617                 int j;
3618                 for(j=0; j<2; j++){
3619                     mpeg_motion(s, dest_y, dest_cb, dest_cr,
3620                                 1, j, j^i,
3621                                 ref_picture, pix_op,
3622                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3623                 }
3624                 pix_op = s->dsp.avg_pixels_tab;
3625             }
3626         }else{
3627             for(i=0; i<2; i++){
3628                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
3629                             0, 0, s->picture_structure != i+1,
3630                             ref_picture, pix_op,
3631                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3632
3633                 // after put we make avg of the same block
3634                 pix_op=s->dsp.avg_pixels_tab;
3635
3636                 //opposite parity is always in the same frame if this is second field
3637                 if(!s->first_field){
3638                     ref_picture = s->current_picture_ptr->data;
3639                 }
3640             }
3641         }
3642     break;
3643     default: assert(0);
3644     }
3645 }
3646
3647 /**
3648  * motion compensation of a single macroblock
3649  * @param s context
3650  * @param dest_y luma destination pointer
3651  * @param dest_cb chroma cb/u destination pointer
3652  * @param dest_cr chroma cr/v destination pointer
3653  * @param dir direction (0->forward, 1->backward)
3654  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3655  * @param pic_op halfpel motion compensation function (average or put normally)
3656  * the motion vectors are taken from s->mv and the MV type from s->mv_type
3657  */
3658 static inline void MPV_motion_lowres(MpegEncContext *s,
3659                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3660                               int dir, uint8_t **ref_picture,
3661                               h264_chroma_mc_func *pix_op)
3662 {
3663     int mx, my;
3664     int mb_x, mb_y, i;
3665     const int lowres= s->avctx->lowres;
3666     const int block_s= 8>>lowres;
3667
3668     mb_x = s->mb_x;
3669     mb_y = s->mb_y;
3670
3671     switch(s->mv_type) {
3672     case MV_TYPE_16X16:
3673         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3674                     0, 0, 0,
3675                     ref_picture, pix_op,
3676                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3677         break;
3678     case MV_TYPE_8X8:
3679         mx = 0;
3680         my = 0;
3681             for(i=0;i<4;i++) {
3682                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3683                             ref_picture[0], 0, 0,
3684                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3685                             s->width, s->height, s->linesize,
3686                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3687                             block_s, block_s, pix_op,
3688                             s->mv[dir][i][0], s->mv[dir][i][1]);
3689
3690                 mx += s->mv[dir][i][0];
3691                 my += s->mv[dir][i][1];
3692             }
3693
3694         if(!(s->flags&CODEC_FLAG_GRAY))
3695             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3696         break;
3697     case MV_TYPE_FIELD:
3698         if (s->picture_structure == PICT_FRAME) {
3699             /* top field */
3700             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3701                         1, 0, s->field_select[dir][0],
3702                         ref_picture, pix_op,
3703                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3704             /* bottom field */
3705             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3706                         1, 1, s->field_select[dir][1],
3707                         ref_picture, pix_op,
3708                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3709         } else {
3710             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3711                 ref_picture= s->current_picture_ptr->data;
3712             }
3713
3714             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3715                         0, 0, s->field_select[dir][0],
3716                         ref_picture, pix_op,
3717                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3718         }
3719         break;
3720     case MV_TYPE_16X8:
3721         for(i=0; i<2; i++){
3722             uint8_t ** ref2picture;
3723
3724             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3725                 ref2picture= ref_picture;
3726             }else{
3727                 ref2picture= s->current_picture_ptr->data;
3728             }
3729
3730             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3731                         0, 0, s->field_select[dir][i],
3732                         ref2picture, pix_op,
3733                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3734
3735             dest_y += 2*block_s*s->linesize;
3736             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3737             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3738         }
3739         break;
3740     case MV_TYPE_DMV:
3741         if(s->picture_structure == PICT_FRAME){
3742             for(i=0; i<2; i++){
3743                 int j;
3744                 for(j=0; j<2; j++){
3745                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3746                                 1, j, j^i,
3747                                 ref_picture, pix_op,
3748                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3749                 }
3750                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3751             }
3752         }else{
3753             for(i=0; i<2; i++){
3754                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3755                             0, 0, s->picture_structure != i+1,
3756                             ref_picture, pix_op,
3757                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3758
3759                 // after put we make avg of the same block
3760                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3761
3762                 //opposite parity is always in the same frame if this is second field
3763                 if(!s->first_field){
3764                     ref_picture = s->current_picture_ptr->data;
3765                 }
3766             }
3767         }
3768     break;
3769     default: assert(0);
3770     }
3771 }
3772
3773 /* put block[] to dest[] */
3774 static inline void put_dct(MpegEncContext *s,
3775                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3776 {
3777     s->dct_unquantize_intra(s, block, i, qscale);
3778     s->dsp.idct_put (dest, line_size, block);
3779 }
3780
3781 /* add block[] to dest[] */
3782 static inline void add_dct(MpegEncContext *s,
3783                            DCTELEM *block, int i, uint8_t *dest, int line_size)
3784 {
3785     if (s->block_last_index[i] >= 0) {
3786         s->dsp.idct_add (dest, line_size, block);
3787     }
3788 }
3789
3790 static inline void add_dequant_dct(MpegEncContext *s,
3791                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3792 {
3793     if (s->block_last_index[i] >= 0) {
3794         s->dct_unquantize_inter(s, block, i, qscale);
3795
3796         s->dsp.idct_add (dest, line_size, block);
3797     }
3798 }
3799
3800 /**
3801  * cleans dc, ac, coded_block for the current non intra MB
3802  */
3803 void ff_clean_intra_table_entries(MpegEncContext *s)
3804 {
3805     int wrap = s->b8_stride;
3806     int xy = s->block_index[0];
3807
3808     s->dc_val[0][xy           ] =
3809     s->dc_val[0][xy + 1       ] =
3810     s->dc_val[0][xy     + wrap] =
3811     s->dc_val[0][xy + 1 + wrap] = 1024;
3812     /* ac pred */
3813     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3814     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3815     if (s->msmpeg4_version>=3) {
3816         s->coded_block[xy           ] =
3817         s->coded_block[xy + 1       ] =
3818         s->coded_block[xy     + wrap] =
3819         s->coded_block[xy + 1 + wrap] = 0;
3820     }
3821     /* chroma */
3822     wrap = s->mb_stride;
3823     xy = s->mb_x + s->mb_y * wrap;
3824     s->dc_val[1][xy] =
3825     s->dc_val[2][xy] = 1024;
3826     /* ac pred */
3827     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3828     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3829
3830     s->mbintra_table[xy]= 0;
3831 }
3832
3833 /* generic function called after a macroblock has been parsed by the
3834    decoder or after it has been encoded by the encoder.
3835
3836    Important variables used:
3837    s->mb_intra : true if intra macroblock
3838    s->mv_dir   : motion vector direction
3839    s->mv_type  : motion vector type
3840    s->mv       : motion vector
3841    s->interlaced_dct : true if interlaced dct used (mpeg2)
3842  */
3843 static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3844 {
3845     int mb_x, mb_y;
3846     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3847 #ifdef HAVE_XVMC
3848     if(s->avctx->xvmc_acceleration){
3849         XVMC_decode_mb(s);//xvmc uses pblocks
3850         return;
3851     }
3852 #endif
3853
3854     mb_x = s->mb_x;
3855     mb_y = s->mb_y;
3856
3857     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3858        /* save DCT coefficients */
3859        int i,j;
3860        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3861        for(i=0; i<6; i++)
3862            for(j=0; j<64; j++)
3863                *dct++ = block[i][s->dsp.idct_permutation[j]];
3864     }
3865
3866     s->current_picture.qscale_table[mb_xy]= s->qscale;
3867
3868     /* update DC predictors for P macroblocks */
3869     if (!s->mb_intra) {
3870         if (s->h263_pred || s->h263_aic) {
3871             if(s->mbintra_table[mb_xy])
3872                 ff_clean_intra_table_entries(s);
3873         } else {
3874             s->last_dc[0] =
3875             s->last_dc[1] =
3876             s->last_dc[2] = 128 << s->intra_dc_precision;
3877         }
3878     }
3879     else if (s->h263_pred || s->h263_aic)
3880         s->mbintra_table[mb_xy]=1;
3881
3882     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3883         uint8_t *dest_y, *dest_cb, *dest_cr;
3884         int dct_linesize, dct_offset;
3885         op_pixels_func (*op_pix)[4];
3886         qpel_mc_func (*op_qpix)[16];
3887         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3888         const int uvlinesize= s->current_picture.linesize[1];
3889         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3890         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3891
3892         /* avoid copy if macroblock skipped in last frame too */
3893         /* skip only during decoding as we might trash the buffers during encoding a bit */
3894         if(!s->encoding){
3895             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3896             const int age= s->current_picture.age;
3897
3898             assert(age);
3899
3900             if (s->mb_skipped) {
3901                 s->mb_skipped= 0;
3902                 assert(s->pict_type!=I_TYPE);
3903
3904                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3905                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3906
3907                 /* if previous was skipped too, then nothing to do !  */
3908                 if (*mbskip_ptr >= age && s->current_picture.reference){
3909                     return;
3910                 }
3911             } else if(!s->current_picture.reference){
3912                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3913                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
3914             } else{
3915                 *mbskip_ptr = 0; /* not skipped */
3916             }
3917         }
3918
3919         dct_linesize = linesize << s->interlaced_dct;
3920         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3921
3922         if(readable){
3923             dest_y=  s->dest[0];
3924             dest_cb= s->dest[1];
3925             dest_cr= s->dest[2];
3926         }else{
3927             dest_y = s->b_scratchpad;
3928             dest_cb= s->b_scratchpad+16*linesize;
3929             dest_cr= s->b_scratchpad+32*linesize;
3930         }
3931
3932         if (!s->mb_intra) {
3933             /* motion handling */
3934             /* decoding or more than one mb_type (MC was already done otherwise) */
3935             if(!s->encoding){
3936                 if(lowres_flag){
3937                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3938
3939                     if (s->mv_dir & MV_DIR_FORWARD) {
3940                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3941                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3942                     }
3943                     if (s->mv_dir & MV_DIR_BACKWARD) {
3944                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3945                     }
3946                 }else{
3947                     op_qpix= s->me.qpel_put;
3948                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
3949                         op_pix = s->dsp.put_pixels_tab;
3950                     }else{
3951                         op_pix = s->dsp.put_no_rnd_pixels_tab;
3952                     }
3953                     if (s->mv_dir & MV_DIR_FORWARD) {
3954                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3955                         op_pix = s->dsp.avg_pixels_tab;
3956                         op_qpix= s->me.qpel_avg;
3957                     }
3958                     if (s->mv_dir & MV_DIR_BACKWARD) {
3959                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3960                     }
3961                 }
3962             }
3963
3964             /* skip dequant / idct if we are really late ;) */
3965             if(s->hurry_up>1) goto skip_idct;
3966             if(s->avctx->skip_idct){
3967                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3968                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3969                    || s->avctx->skip_idct >= AVDISCARD_ALL)
3970                     goto skip_idct;
3971             }
3972
3973             /* add dct residue */
3974             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3975                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3976                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3977                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3978                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3979                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3980
3981                 if(!(s->flags&CODEC_FLAG_GRAY)){
3982                     if (s->chroma_y_shift){
3983                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3984                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3985                     }else{
3986                         dct_linesize >>= 1;
3987                         dct_offset >>=1;
3988                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
3989                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
3990                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
3991                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
3992                     }
3993                 }
3994             } else if(s->codec_id != CODEC_ID_WMV2){
3995                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3996                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3997                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3998                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3999
4000                 if(!(s->flags&CODEC_FLAG_GRAY)){
4001                     if(s->chroma_y_shift){//Chroma420
4002                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
4003                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
4004                     }else{
4005                         //chroma422
4006                         dct_linesize = uvlinesize << s->interlaced_dct;
4007                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4008
4009                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
4010                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
4011                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
4012                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
4013                         if(!s->chroma_x_shift){//Chroma444
4014                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
4015                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
4016                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
4017                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
4018                         }
4019                     }
4020                 }//fi gray
4021             }
4022             else{
4023                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
4024             }
4025         } else {
4026             /* dct only in intra block */
4027             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4028                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4029                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4030                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4031                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4032
4033                 if(!(s->flags&CODEC_FLAG_GRAY)){
4034                     if(s->chroma_y_shift){
4035                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4036                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4037                     }else{
4038                         dct_offset >>=1;
4039                         dct_linesize >>=1;
4040                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
4041                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
4042                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
4043                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
4044                     }
4045                 }
4046             }else{
4047                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4048                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4049                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4050                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4051
4052                 if(!(s->flags&CODEC_FLAG_GRAY)){
4053                     if(s->chroma_y_shift){
4054                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4055                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4056                     }else{
4057
4058                         dct_linesize = uvlinesize << s->interlaced_dct;
4059                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4060
4061                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4062                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4063                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4064                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4065                         if(!s->chroma_x_shift){//Chroma444
4066                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4067                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4068                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4069                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4070                         }
4071                     }
4072                 }//gray
4073             }
4074         }
4075 skip_idct:
4076         if(!readable){
4077             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4078             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4079             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4080         }
4081     }
4082 }
4083
4084 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4085     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4086     else                  MPV_decode_mb_internal(s, block, 0);
4087 }
4088
4089 #ifdef CONFIG_ENCODERS
4090
4091 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4092 {
4093     static const char tab[64]=
4094         {3,2,2,1,1,1,1,1,
4095          1,1,1,1,1,1,1,1,
4096          1,1,1,1,1,1,1,1,
4097          0,0,0,0,0,0,0,0,
4098          0,0,0,0,0,0,0,0,
4099          0,0,0,0,0,0,0,0,
4100          0,0,0,0,0,0,0,0,
4101          0,0,0,0,0,0,0,0};
4102     int score=0;
4103     int run=0;
4104     int i;
4105     DCTELEM *block= s->block[n];
4106     const int last_index= s->block_last_index[n];
4107     int skip_dc;
4108
4109     if(threshold<0){
4110         skip_dc=0;
4111         threshold= -threshold;
4112     }else
4113         skip_dc=1;
4114
4115     /* are all which we could set to zero are allready zero? */
4116     if(last_index<=skip_dc - 1) return;
4117
4118     for(i=0; i<=last_index; i++){
4119         const int j = s->intra_scantable.permutated[i];
4120         const int level = ABS(block[j]);
4121         if(level==1){
4122             if(skip_dc && i==0) continue;
4123             score+= tab[run];
4124             run=0;
4125         }else if(level>1){
4126             return;
4127         }else{
4128             run++;
4129         }
4130     }
4131     if(score >= threshold) return;
4132     for(i=skip_dc; i<=last_index; i++){
4133         const int j = s->intra_scantable.permutated[i];
4134         block[j]=0;
4135     }
4136     if(block[0]) s->block_last_index[n]= 0;
4137     else         s->block_last_index[n]= -1;
4138 }
4139
4140 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4141 {
4142     int i;
4143     const int maxlevel= s->max_qcoeff;
4144     const int minlevel= s->min_qcoeff;
4145     int overflow=0;
4146
4147     if(s->mb_intra){
4148         i=1; //skip clipping of intra dc
4149     }else
4150         i=0;
4151
4152     for(;i<=last_index; i++){
4153         const int j= s->intra_scantable.permutated[i];
4154         int level = block[j];
4155
4156         if     (level>maxlevel){
4157             level=maxlevel;
4158             overflow++;
4159         }else if(level<minlevel){
4160             level=minlevel;
4161             overflow++;
4162         }
4163
4164         block[j]= level;
4165     }
4166
4167     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4168         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4169 }
4170
4171 #endif //CONFIG_ENCODERS
4172
4173 /**
4174  *
4175  * @param h is the normal height, this will be reduced automatically if needed for the last row
4176  */
4177 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4178     if (s->avctx->draw_horiz_band) {
4179         AVFrame *src;
4180         int offset[4];
4181
4182         if(s->picture_structure != PICT_FRAME){
4183             h <<= 1;
4184             y <<= 1;
4185             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4186         }
4187
4188         h= FFMIN(h, s->avctx->height - y);
4189
4190         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4191             src= (AVFrame*)s->current_picture_ptr;
4192         else if(s->last_picture_ptr)
4193             src= (AVFrame*)s->last_picture_ptr;
4194         else
4195             return;
4196
4197         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4198             offset[0]=
4199             offset[1]=
4200             offset[2]=
4201             offset[3]= 0;
4202         }else{
4203             offset[0]= y * s->linesize;;
4204             offset[1]=
4205             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4206             offset[3]= 0;
4207         }
4208
4209         emms_c();
4210
4211         s->avctx->draw_horiz_band(s->avctx, src, offset,
4212                                   y, s->picture_structure, h);
4213     }
4214 }
4215
4216 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4217     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4218     const int uvlinesize= s->current_picture.linesize[1];
4219     const int mb_size= 4 - s->avctx->lowres;
4220
4221     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4222     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4223     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4224     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4225     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4226     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4227     //block_index is not used by mpeg2, so it is not affected by chroma_format
4228
4229     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4230     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4231     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4232
4233     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4234     {
4235         s->dest[0] += s->mb_y *   linesize << mb_size;
4236         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4237         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4238     }
4239 }
4240
4241 #ifdef CONFIG_ENCODERS
4242
4243 static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4244     int x, y;
4245 //FIXME optimize
4246     for(y=0; y<8; y++){
4247         for(x=0; x<8; x++){
4248             int x2, y2;
4249             int sum=0;
4250             int sqr=0;
4251             int count=0;
4252
4253             for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4254                 for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4255                     int v= ptr[x2 + y2*stride];
4256                     sum += v;
4257                     sqr += v*v;
4258                     count++;
4259                 }
4260             }
4261             weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4262         }
4263     }
4264 }
4265
4266 static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count)
4267 {
4268     int16_t weight[8][64];
4269     DCTELEM orig[8][64];
4270     const int mb_x= s->mb_x;
4271     const int mb_y= s->mb_y;
4272     int i;
4273     int skip_dct[8];
4274     int dct_offset   = s->linesize*8; //default for progressive frames
4275     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4276     int wrap_y, wrap_c;
4277
4278     for(i=0; i<mb_block_count; i++) skip_dct[i]=0;
4279
4280     if(s->adaptive_quant){
4281         const int last_qp= s->qscale;
4282         const int mb_xy= mb_x + mb_y*s->mb_stride;
4283
4284         s->lambda= s->lambda_table[mb_xy];
4285         update_qscale(s);
4286
4287         if(!(s->flags&CODEC_FLAG_QP_RD)){
4288             s->dquant= s->qscale - last_qp;
4289
4290             if(s->out_format==FMT_H263){
4291                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4292
4293                 if(s->codec_id==CODEC_ID_MPEG4){
4294                     if(!s->mb_intra){
4295                         if(s->pict_type == B_TYPE){
4296                             if(s->dquant&1)
4297                                 s->dquant= (s->dquant/2)*2;
4298                             if(s->mv_dir&MV_DIRECT)
4299                                 s->dquant= 0;
4300                         }
4301                         if(s->mv_type==MV_TYPE_8X8)
4302                             s->dquant=0;
4303                     }
4304                 }
4305             }
4306         }
4307         ff_set_qscale(s, last_qp + s->dquant);
4308     }else if(s->flags&CODEC_FLAG_QP_RD)
4309         ff_set_qscale(s, s->qscale + s->dquant);
4310
4311     wrap_y = s->linesize;
4312     wrap_c = s->uvlinesize;
4313     ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4314     ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4315     ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8;
4316
4317     if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4318         uint8_t *ebuf= s->edge_emu_buffer + 32;
4319         ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4320         ptr_y= ebuf;
4321         ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4322         ptr_cb= ebuf+18*wrap_y;
4323         ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4324         ptr_cr= ebuf+18*wrap_y+8;
4325     }
4326
4327     if (s->mb_intra) {
4328         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4329             int progressive_score, interlaced_score;
4330
4331             s->interlaced_dct=0;
4332             progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4333                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4334
4335             if(progressive_score > 0){
4336                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4337                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4338                 if(progressive_score > interlaced_score){
4339                     s->interlaced_dct=1;
4340
4341                     dct_offset= wrap_y;
4342                     wrap_y<<=1;
4343                     if (s->chroma_format == CHROMA_422)
4344                         wrap_c<<=1;
4345                 }
4346             }
4347         }
4348
4349         s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4350         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4351         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4352         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4353
4354         if(s->flags&CODEC_FLAG_GRAY){
4355             skip_dct[4]= 1;
4356             skip_dct[5]= 1;
4357         }else{
4358             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4359             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4360             if(!s->chroma_y_shift){ /* 422 */
4361                 s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c);
4362                 s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c);
4363             }
4364         }
4365     }else{
4366         op_pixels_func (*op_pix)[4];
4367         qpel_mc_func (*op_qpix)[16];
4368         uint8_t *dest_y, *dest_cb, *dest_cr;
4369
4370         dest_y  = s->dest[0];
4371         dest_cb = s->dest[1];
4372         dest_cr = s->dest[2];
4373
4374         if ((!s->no_rounding) || s->pict_type==B_TYPE){
4375             op_pix = s->dsp.put_pixels_tab;
4376             op_qpix= s->dsp.put_qpel_pixels_tab;
4377         }else{
4378             op_pix = s->dsp.put_no_rnd_pixels_tab;
4379             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4380         }
4381
4382         if (s->mv_dir & MV_DIR_FORWARD) {
4383             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4384             op_pix = s->dsp.avg_pixels_tab;
4385             op_qpix= s->dsp.avg_qpel_pixels_tab;
4386         }
4387         if (s->mv_dir & MV_DIR_BACKWARD) {
4388             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4389         }
4390
4391         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4392             int progressive_score, interlaced_score;
4393
4394             s->interlaced_dct=0;
4395             progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4396                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4397
4398             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4399
4400             if(progressive_score>0){
4401                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4402                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4403
4404                 if(progressive_score > interlaced_score){
4405                     s->interlaced_dct=1;
4406
4407                     dct_offset= wrap_y;
4408                     wrap_y<<=1;
4409                     if (s->chroma_format == CHROMA_422)
4410                         wrap_c<<=1;
4411                 }
4412             }
4413         }
4414
4415         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4416         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4417         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4418         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4419
4420         if(s->flags&CODEC_FLAG_GRAY){
4421             skip_dct[4]= 1;
4422             skip_dct[5]= 1;
4423         }else{
4424             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4425             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4426             if(!s->chroma_y_shift){ /* 422 */
4427                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c);
4428                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c);
4429             }
4430         }
4431         /* pre quantization */
4432         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4433             //FIXME optimize
4434             if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4435             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4436             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4437             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4438             if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4439             if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4440             if(!s->chroma_y_shift){ /* 422 */
4441                 if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1;
4442                 if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1;
4443             }
4444         }
4445     }
4446
4447     if(s->avctx->quantizer_noise_shaping){
4448         if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4449         if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4450         if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4451         if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4452         if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4453         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4454         if(!s->chroma_y_shift){ /* 422 */
4455             if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c);
4456             if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c);
4457         }
4458         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count);
4459     }
4460
4461     /* DCT & quantize */
4462     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4463     {
4464         for(i=0;i<mb_block_count;i++) {
4465             if(!skip_dct[i]){
4466                 int overflow;
4467                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4468             // FIXME we could decide to change to quantizer instead of clipping
4469             // JS: I don't think that would be a good idea it could lower quality instead
4470             //     of improve it. Just INTRADC clipping deserves changes in quantizer
4471                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4472             }else
4473                 s->block_last_index[i]= -1;
4474         }
4475         if(s->avctx->quantizer_noise_shaping){
4476             for(i=0;i<mb_block_count;i++) {
4477                 if(!skip_dct[i]){
4478                     s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4479                 }
4480             }
4481         }
4482
4483         if(s->luma_elim_threshold && !s->mb_intra)
4484             for(i=0; i<4; i++)
4485                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4486         if(s->chroma_elim_threshold && !s->mb_intra)
4487             for(i=4; i<mb_block_count; i++)
4488                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4489
4490         if(s->flags & CODEC_FLAG_CBP_RD){
4491             for(i=0;i<mb_block_count;i++) {
4492                 if(s->block_last_index[i] == -1)
4493                     s->coded_score[i]= INT_MAX/256;
4494             }
4495         }
4496     }
4497
4498     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4499         s->block_last_index[4]=
4500         s->block_last_index[5]= 0;
4501         s->block[4][0]=
4502         s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4503     }
4504
4505     //non c quantize code returns incorrect block_last_index FIXME
4506     if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4507         for(i=0; i<mb_block_count; i++){
4508             int j;
4509             if(s->block_last_index[i]>0){
4510                 for(j=63; j>0; j--){
4511                     if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4512                 }
4513                 s->block_last_index[i]= j;
4514             }
4515         }
4516     }
4517
4518     /* huffman encode */
4519     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4520     case CODEC_ID_MPEG1VIDEO:
4521     case CODEC_ID_MPEG2VIDEO:
4522         mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4523     case CODEC_ID_MPEG4:
4524         mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4525     case CODEC_ID_MSMPEG4V2:
4526     case CODEC_ID_MSMPEG4V3:
4527     case CODEC_ID_WMV1:
4528         msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4529     case CODEC_ID_WMV2:
4530          ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4531 #ifdef CONFIG_H261_ENCODER
4532     case CODEC_ID_H261:
4533         ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4534 #endif
4535     case CODEC_ID_H263:
4536     case CODEC_ID_H263P:
4537     case CODEC_ID_FLV1:
4538     case CODEC_ID_RV10:
4539     case CODEC_ID_RV20:
4540         h263_encode_mb(s, s->block, motion_x, motion_y); break;
4541     case CODEC_ID_MJPEG:
4542         mjpeg_encode_mb(s, s->block); break;
4543     default:
4544         assert(0);
4545     }
4546 }
4547
4548 static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4549 {
4550     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
4551     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
4552 }
4553
4554 #endif //CONFIG_ENCODERS
4555
4556 void ff_mpeg_flush(AVCodecContext *avctx){
4557     int i;
4558     MpegEncContext *s = avctx->priv_data;
4559
4560     if(s==NULL || s->picture==NULL)
4561         return;
4562
4563     for(i=0; i<MAX_PICTURE_COUNT; i++){
4564        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4565                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
4566         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4567     }
4568     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4569
4570     s->mb_x= s->mb_y= 0;
4571
4572     s->parse_context.state= -1;
4573     s->parse_context.frame_start_found= 0;
4574     s->parse_context.overread= 0;
4575     s->parse_context.overread_index= 0;
4576     s->parse_context.index= 0;
4577     s->parse_context.last_index= 0;
4578     s->bitstream_buffer_size=0;
4579 }
4580
4581 #ifdef CONFIG_ENCODERS
4582 void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4583 {
4584     const uint16_t *srcw= (uint16_t*)src;
4585     int words= length>>4;
4586     int bits= length&15;
4587     int i;
4588
4589     if(length==0) return;
4590
4591     if(words < 16){
4592         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4593     }else if(put_bits_count(pb)&7){
4594         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4595     }else{
4596         for(i=0; put_bits_count(pb)&31; i++)
4597             put_bits(pb, 8, src[i]);
4598         flush_put_bits(pb);
4599         memcpy(pbBufPtr(pb), src+i, 2*words-i);
4600         skip_put_bytes(pb, 2*words-i);
4601     }
4602
4603     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4604 }
4605
4606 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4607     int i;
4608
4609     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4610
4611     /* mpeg1 */
4612     d->mb_skip_run= s->mb_skip_run;
4613     for(i=0; i<3; i++)
4614         d->last_dc[i]= s->last_dc[i];
4615
4616     /* statistics */
4617     d->mv_bits= s->mv_bits;
4618     d->i_tex_bits= s->i_tex_bits;
4619     d->p_tex_bits= s->p_tex_bits;
4620     d->i_count= s->i_count;
4621     d->f_count= s->f_count;
4622     d->b_count= s->b_count;
4623     d->skip_count= s->skip_count;
4624     d->misc_bits= s->misc_bits;
4625     d->last_bits= 0;
4626
4627     d->mb_skipped= 0;
4628     d->qscale= s->qscale;
4629     d->dquant= s->dquant;
4630 }
4631
4632 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4633     int i;
4634
4635     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4636     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4637
4638     /* mpeg1 */
4639     d->mb_skip_run= s->mb_skip_run;
4640     for(i=0; i<3; i++)
4641         d->last_dc[i]= s->last_dc[i];
4642
4643     /* statistics */
4644     d->mv_bits= s->mv_bits;
4645     d->i_tex_bits= s->i_tex_bits;
4646     d->p_tex_bits= s->p_tex_bits;
4647     d->i_count= s->i_count;
4648     d->f_count= s->f_count;
4649     d->b_count= s->b_count;
4650     d->skip_count= s->skip_count;
4651     d->misc_bits= s->misc_bits;
4652
4653     d->mb_intra= s->mb_intra;
4654     d->mb_skipped= s->mb_skipped;
4655     d->mv_type= s->mv_type;
4656     d->mv_dir= s->mv_dir;
4657     d->pb= s->pb;
4658     if(s->data_partitioning){
4659         d->pb2= s->pb2;
4660         d->tex_pb= s->tex_pb;
4661     }
4662     d->block= s->block;
4663     for(i=0; i<8; i++)
4664         d->block_last_index[i]= s->block_last_index[i];
4665     d->interlaced_dct= s->interlaced_dct;
4666     d->qscale= s->qscale;
4667 }
4668
4669 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4670                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4671                            int *dmin, int *next_block, int motion_x, int motion_y)
4672 {
4673     int score;
4674     uint8_t *dest_backup[3];
4675
4676     copy_context_before_encode(s, backup, type);
4677
4678     s->block= s->blocks[*next_block];
4679     s->pb= pb[*next_block];
4680     if(s->data_partitioning){
4681         s->pb2   = pb2   [*next_block];
4682         s->tex_pb= tex_pb[*next_block];
4683     }
4684
4685     if(*next_block){
4686         memcpy(dest_backup, s->dest, sizeof(s->dest));
4687         s->dest[0] = s->rd_scratchpad;
4688         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4689         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4690         assert(s->linesize >= 32); //FIXME
4691     }
4692
4693     encode_mb(s, motion_x, motion_y);
4694
4695     score= put_bits_count(&s->pb);
4696     if(s->data_partitioning){
4697         score+= put_bits_count(&s->pb2);
4698         score+= put_bits_count(&s->tex_pb);
4699     }
4700
4701     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4702         MPV_decode_mb(s, s->block);
4703
4704         score *= s->lambda2;
4705         score += sse_mb(s) << FF_LAMBDA_SHIFT;
4706     }
4707
4708     if(*next_block){
4709         memcpy(s->dest, dest_backup, sizeof(s->dest));
4710     }
4711
4712     if(score<*dmin){
4713         *dmin= score;
4714         *next_block^=1;
4715
4716         copy_context_after_encode(best, s, type);
4717     }
4718 }
4719
4720 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4721     uint32_t *sq = squareTbl + 256;
4722     int acc=0;
4723     int x,y;
4724
4725     if(w==16 && h==16)
4726         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4727     else if(w==8 && h==8)
4728         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4729
4730     for(y=0; y<h; y++){
4731         for(x=0; x<w; x++){
4732             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4733         }
4734     }
4735
4736     assert(acc>=0);
4737
4738     return acc;
4739 }
4740
4741 static int sse_mb(MpegEncContext *s){
4742     int w= 16;
4743     int h= 16;
4744
4745     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4746     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4747
4748     if(w==16 && h==16)
4749       if(s->avctx->mb_cmp == FF_CMP_NSSE){
4750         return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4751                +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4752                +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4753       }else{
4754         return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4755                +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4756                +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4757       }
4758     else
4759         return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
4760                +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
4761                +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
4762 }
4763
4764 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
4765     MpegEncContext *s= arg;
4766
4767
4768     s->me.pre_pass=1;
4769     s->me.dia_size= s->avctx->pre_dia_size;
4770     s->first_slice_line=1;
4771     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
4772         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
4773             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4774         }
4775         s->first_slice_line=0;
4776     }
4777
4778     s->me.pre_pass=0;
4779
4780     return 0;
4781 }
4782
4783 static int estimate_motion_thread(AVCodecContext *c, void *arg){
4784     MpegEncContext *s= arg;
4785
4786     s->me.dia_size= s->avctx->dia_size;
4787     s->first_slice_line=1;
4788     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
4789         s->mb_x=0; //for block init below
4790         ff_init_block_index(s);
4791         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
4792             s->block_index[0]+=2;
4793             s->block_index[1]+=2;
4794             s->block_index[2]+=2;
4795             s->block_index[3]+=2;
4796
4797             /* compute motion vector & mb_type and store in context */
4798             if(s->pict_type==B_TYPE)
4799                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
4800             else
4801                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
4802         }
4803         s->first_slice_line=0;
4804     }
4805     return 0;
4806 }
4807
4808 static int mb_var_thread(AVCodecContext *c, void *arg){
4809     MpegEncContext *s= arg;
4810     int mb_x, mb_y;
4811
4812     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4813         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4814             int xx = mb_x * 16;
4815             int yy = mb_y * 16;
4816             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
4817             int varc;
4818             int sum = s->dsp.pix_sum(pix, s->linesize);
4819
4820             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
4821
4822             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
4823             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
4824             s->me.mb_var_sum_temp    += varc;
4825         }
4826     }
4827     return 0;
4828 }
4829
4830 static void write_slice_end(MpegEncContext *s){
4831     if(s->codec_id==CODEC_ID_MPEG4){
4832         if(s->partitioned_frame){
4833             ff_mpeg4_merge_partitions(s);
4834         }
4835
4836         ff_mpeg4_stuffing(&s->pb);
4837     }else if(s->out_format == FMT_MJPEG){
4838         ff_mjpeg_stuffing(&s->pb);
4839     }
4840
4841     align_put_bits(&s->pb);
4842     flush_put_bits(&s->pb);
4843
4844     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
4845         s->misc_bits+= get_bits_diff(s);
4846 }
4847
4848 static int encode_thread(AVCodecContext *c, void *arg){
4849     MpegEncContext *s= arg;
4850     int mb_x, mb_y, pdif = 0;
4851     int i, j;
4852     MpegEncContext best_s, backup_s;
4853     uint8_t bit_buf[2][MAX_MB_BYTES];
4854     uint8_t bit_buf2[2][MAX_MB_BYTES];
4855     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
4856     PutBitContext pb[2], pb2[2], tex_pb[2];
4857 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
4858
4859     for(i=0; i<2; i++){
4860         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
4861         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
4862         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
4863     }
4864
4865     s->last_bits= put_bits_count(&s->pb);
4866     s->mv_bits=0;
4867     s->misc_bits=0;
4868     s->i_tex_bits=0;
4869     s->p_tex_bits=0;
4870     s->i_count=0;
4871     s->f_count=0;
4872     s->b_count=0;
4873     s->skip_count=0;
4874
4875     for(i=0; i<3; i++){
4876         /* init last dc values */
4877         /* note: quant matrix value (8) is implied here */
4878         s->last_dc[i] = 128 << s->intra_dc_precision;
4879
4880         s->current_picture.error[i] = 0;
4881     }
4882     s->mb_skip_run = 0;
4883     memset(s->last_mv, 0, sizeof(s->last_mv));
4884
4885     s->last_mv_dir = 0;
4886
4887     switch(s->codec_id){
4888     case CODEC_ID_H263:
4889     case CODEC_ID_H263P:
4890     case CODEC_ID_FLV1:
4891         s->gob_index = ff_h263_get_gob_height(s);
4892         break;
4893     case CODEC_ID_MPEG4:
4894         if(s->partitioned_frame)
4895             ff_mpeg4_init_partitions(s);
4896         break;
4897     }
4898
4899     s->resync_mb_x=0;
4900     s->resync_mb_y=0;
4901     s->first_slice_line = 1;
4902     s->ptr_lastgob = s->pb.buf;
4903     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
4904 //    printf("row %d at %X\n", s->mb_y, (int)s);
4905         s->mb_x=0;
4906         s->mb_y= mb_y;
4907
4908         ff_set_qscale(s, s->qscale);
4909         ff_init_block_index(s);
4910
4911         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
4912             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
4913             int mb_type= s->mb_type[xy];
4914 //            int d;
4915             int dmin= INT_MAX;
4916             int dir;
4917
4918             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
4919                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4920                 return -1;
4921             }
4922             if(s->data_partitioning){
4923                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
4924                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
4925                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4926                     return -1;
4927                 }
4928             }
4929
4930             s->mb_x = mb_x;
4931             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
4932             ff_update_block_index(s);
4933
4934 #ifdef CONFIG_H261_ENCODER
4935             if(s->codec_id == CODEC_ID_H261){
4936                 ff_h261_reorder_mb_index(s);
4937                 xy= s->mb_y*s->mb_stride + s->mb_x;
4938                 mb_type= s->mb_type[xy];
4939             }
4940 #endif
4941
4942             /* write gob / video packet header  */
4943             if(s->rtp_mode){
4944                 int current_packet_size, is_gob_start;
4945
4946                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
4947
4948                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
4949
4950                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
4951
4952                 switch(s->codec_id){
4953                 case CODEC_ID_H263:
4954                 case CODEC_ID_H263P:
4955                     if(!s->h263_slice_structured)
4956                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
4957                     break;
4958                 case CODEC_ID_MPEG2VIDEO:
4959                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
4960                 case CODEC_ID_MPEG1VIDEO:
4961                     if(s->mb_skip_run) is_gob_start=0;
4962                     break;
4963                 }
4964
4965                 if(is_gob_start){
4966                     if(s->start_mb_y != mb_y || mb_x!=0){
4967                         write_slice_end(s);
4968
4969                         if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
4970                             ff_mpeg4_init_partitions(s);
4971                         }
4972                     }
4973
4974                     assert((put_bits_count(&s->pb)&7) == 0);
4975                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
4976
4977                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
4978                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
4979                         int d= 100 / s->avctx->error_rate;
4980                         if(r % d == 0){
4981                             current_packet_size=0;
4982 #ifndef ALT_BITSTREAM_WRITER
4983                             s->pb.buf_ptr= s->ptr_lastgob;
4984 #endif
4985                             assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
4986                         }
4987                     }
4988
4989                     if (s->avctx->rtp_callback){
4990                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
4991                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
4992                     }
4993
4994                     switch(s->codec_id){
4995                     case CODEC_ID_MPEG4:
4996                         ff_mpeg4_encode_video_packet_header(s);
4997                         ff_mpeg4_clean_buffers(s);
4998                     break;
4999                     case CODEC_ID_MPEG1VIDEO:
5000                     case CODEC_ID_MPEG2VIDEO:
5001                         ff_mpeg1_encode_slice_header(s);
5002                         ff_mpeg1_clean_buffers(s);
5003                     break;
5004                     case CODEC_ID_H263:
5005                     case CODEC_ID_H263P:
5006                         h263_encode_gob_header(s, mb_y);
5007                     break;
5008                     }
5009
5010                     if(s->flags&CODEC_FLAG_PASS1){
5011                         int bits= put_bits_count(&s->pb);
5012                         s->misc_bits+= bits - s->last_bits;
5013                         s->last_bits= bits;
5014                     }
5015
5016                     s->ptr_lastgob += current_packet_size;
5017                     s->first_slice_line=1;
5018                     s->resync_mb_x=mb_x;
5019                     s->resync_mb_y=mb_y;
5020                 }
5021             }
5022
5023             if(  (s->resync_mb_x   == s->mb_x)
5024                && s->resync_mb_y+1 == s->mb_y){
5025                 s->first_slice_line=0;
5026             }
5027
5028             s->mb_skipped=0;
5029             s->dquant=0; //only for QP_RD
5030
5031             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
5032                 int next_block=0;
5033                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
5034
5035                 copy_context_before_encode(&backup_s, s, -1);
5036                 backup_s.pb= s->pb;
5037                 best_s.data_partitioning= s->data_partitioning;
5038                 best_s.partitioned_frame= s->partitioned_frame;
5039                 if(s->data_partitioning){
5040                     backup_s.pb2= s->pb2;
5041                     backup_s.tex_pb= s->tex_pb;
5042                 }
5043
5044                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
5045                     s->mv_dir = MV_DIR_FORWARD;
5046                     s->mv_type = MV_TYPE_16X16;
5047                     s->mb_intra= 0;
5048                     s->mv[0][0][0] = s->p_mv_table[xy][0];
5049                     s->mv[0][0][1] = s->p_mv_table[xy][1];
5050                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
5051                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5052                 }
5053                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
5054                     s->mv_dir = MV_DIR_FORWARD;
5055                     s->mv_type = MV_TYPE_FIELD;
5056                     s->mb_intra= 0;
5057                     for(i=0; i<2; i++){
5058                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5059                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5060                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5061                     }
5062                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
5063                                  &dmin, &next_block, 0, 0);
5064                 }
5065                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
5066                     s->mv_dir = MV_DIR_FORWARD;
5067                     s->mv_type = MV_TYPE_16X16;
5068                     s->mb_intra= 0;
5069                     s->mv[0][0][0] = 0;
5070                     s->mv[0][0][1] = 0;
5071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
5072                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5073                 }
5074                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
5075                     s->mv_dir = MV_DIR_FORWARD;
5076                     s->mv_type = MV_TYPE_8X8;
5077                     s->mb_intra= 0;
5078                     for(i=0; i<4; i++){
5079                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5080                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5081                     }
5082                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
5083                                  &dmin, &next_block, 0, 0);
5084                 }
5085                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
5086                     s->mv_dir = MV_DIR_FORWARD;
5087                     s->mv_type = MV_TYPE_16X16;
5088                     s->mb_intra= 0;
5089                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5090                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5091                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
5092                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
5093                 }
5094                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
5095                     s->mv_dir = MV_DIR_BACKWARD;
5096                     s->mv_type = MV_TYPE_16X16;
5097                     s->mb_intra= 0;
5098                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5099                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5100                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
5101                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
5102                 }
5103                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
5104                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5105                     s->mv_type = MV_TYPE_16X16;
5106                     s->mb_intra= 0;
5107                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5108                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5109                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5110                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5111                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
5112                                  &dmin, &next_block, 0, 0);
5113                 }
5114                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
5115                     int mx= s->b_direct_mv_table[xy][0];
5116                     int my= s->b_direct_mv_table[xy][1];
5117
5118                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5119                     s->mb_intra= 0;
5120                     ff_mpeg4_set_direct_mv(s, mx, my);
5121                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
5122                                  &dmin, &next_block, mx, my);
5123                 }
5124                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
5125                     s->mv_dir = MV_DIR_FORWARD;
5126                     s->mv_type = MV_TYPE_FIELD;
5127                     s->mb_intra= 0;
5128                     for(i=0; i<2; i++){
5129                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5130                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5131                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5132                     }
5133                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
5134                                  &dmin, &next_block, 0, 0);
5135                 }
5136                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
5137                     s->mv_dir = MV_DIR_BACKWARD;
5138                     s->mv_type = MV_TYPE_FIELD;
5139                     s->mb_intra= 0;
5140                     for(i=0; i<2; i++){
5141                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5142                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5143                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5144                     }
5145                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
5146                                  &dmin, &next_block, 0, 0);
5147                 }
5148                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
5149                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5150                     s->mv_type = MV_TYPE_FIELD;
5151                     s->mb_intra= 0;
5152                     for(dir=0; dir<2; dir++){
5153                         for(i=0; i<2; i++){
5154                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5155                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5156                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5157                         }
5158                     }
5159                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
5160                                  &dmin, &next_block, 0, 0);
5161                 }
5162                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
5163                     s->mv_dir = 0;
5164                     s->mv_type = MV_TYPE_16X16;
5165                     s->mb_intra= 1;
5166                     s->mv[0][0][0] = 0;
5167                     s->mv[0][0][1] = 0;
5168                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
5169                                  &dmin, &next_block, 0, 0);
5170                     if(s->h263_pred || s->h263_aic){
5171                         if(best_s.mb_intra)
5172                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
5173                         else
5174                             ff_clean_intra_table_entries(s); //old mode?
5175                     }
5176                 }
5177
5178                 if(s->flags & CODEC_FLAG_QP_RD){
5179                     if(best_s.mv_type==MV_TYPE_16X16 && !(best_s.mv_dir&MV_DIRECT)){
5180                         const int last_qp= backup_s.qscale;
5181                         int dquant, dir, qp, dc[6];
5182                         DCTELEM ac[6][16];
5183                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
5184
5185                         assert(backup_s.dquant == 0);
5186
5187                         //FIXME intra
5188                         s->mv_dir= best_s.mv_dir;
5189                         s->mv_type = MV_TYPE_16X16;
5190                         s->mb_intra= best_s.mb_intra;
5191                         s->mv[0][0][0] = best_s.mv[0][0][0];
5192                         s->mv[0][0][1] = best_s.mv[0][0][1];
5193                         s->mv[1][0][0] = best_s.mv[1][0][0];
5194                         s->mv[1][0][1] = best_s.mv[1][0][1];
5195
5196                         dir= s->pict_type == B_TYPE ? 2 : 1;
5197                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
5198                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
5199                             qp= last_qp + dquant;
5200                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
5201                                 break;
5202                             backup_s.dquant= dquant;
5203                             if(s->mb_intra && s->dc_val[0]){
5204                                 for(i=0; i<6; i++){
5205                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
5206                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
5207                                 }
5208                             }
5209
5210                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
5211                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
5212                             if(best_s.qscale != qp){
5213                                 if(s->mb_intra && s->dc_val[0]){
5214                                     for(i=0; i<6; i++){
5215                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
5216                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
5217                                     }
5218                                 }
5219                                 if(dir > 0 && dquant==dir){
5220                                     dquant= 0;
5221                                     dir= -dir;
5222                                 }else
5223                                     break;
5224                             }
5225                         }
5226                         qp= best_s.qscale;
5227                         s->current_picture.qscale_table[xy]= qp;
5228                     }
5229                 }
5230
5231                 copy_context_after_encode(s, &best_s, -1);
5232
5233                 pb_bits_count= put_bits_count(&s->pb);
5234                 flush_put_bits(&s->pb);
5235                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
5236                 s->pb= backup_s.pb;
5237
5238                 if(s->data_partitioning){
5239                     pb2_bits_count= put_bits_count(&s->pb2);
5240                     flush_put_bits(&s->pb2);
5241                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
5242                     s->pb2= backup_s.pb2;
5243
5244                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
5245                     flush_put_bits(&s->tex_pb);
5246                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
5247                     s->tex_pb= backup_s.tex_pb;
5248                 }
5249                 s->last_bits= put_bits_count(&s->pb);
5250
5251                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5252                     ff_h263_update_motion_val(s);
5253
5254                 if(next_block==0){ //FIXME 16 vs linesize16
5255                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
5256                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
5257                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
5258                 }
5259
5260                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
5261                     MPV_decode_mb(s, s->block);
5262             } else {
5263                 int motion_x, motion_y;
5264                 s->mv_type=MV_TYPE_16X16;
5265                 // only one MB-Type possible
5266
5267                 switch(mb_type){
5268                 case CANDIDATE_MB_TYPE_INTRA:
5269                     s->mv_dir = 0;
5270                     s->mb_intra= 1;
5271                     motion_x= s->mv[0][0][0] = 0;
5272                     motion_y= s->mv[0][0][1] = 0;
5273                     break;
5274                 case CANDIDATE_MB_TYPE_INTER:
5275                     s->mv_dir = MV_DIR_FORWARD;
5276                     s->mb_intra= 0;
5277                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
5278                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
5279                     break;
5280                 case CANDIDATE_MB_TYPE_INTER_I:
5281                     s->mv_dir = MV_DIR_FORWARD;
5282                     s->mv_type = MV_TYPE_FIELD;
5283                     s->mb_intra= 0;
5284                     for(i=0; i<2; i++){
5285                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
5286                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
5287                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
5288                     }
5289                     motion_x = motion_y = 0;
5290                     break;
5291                 case CANDIDATE_MB_TYPE_INTER4V:
5292                     s->mv_dir = MV_DIR_FORWARD;
5293                     s->mv_type = MV_TYPE_8X8;
5294                     s->mb_intra= 0;
5295                     for(i=0; i<4; i++){
5296                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
5297                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
5298                     }
5299                     motion_x= motion_y= 0;
5300                     break;
5301                 case CANDIDATE_MB_TYPE_DIRECT:
5302                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
5303                     s->mb_intra= 0;
5304                     motion_x=s->b_direct_mv_table[xy][0];
5305                     motion_y=s->b_direct_mv_table[xy][1];
5306                     ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
5307                     break;
5308                 case CANDIDATE_MB_TYPE_BIDIR:
5309                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5310                     s->mb_intra= 0;
5311                     motion_x=0;
5312                     motion_y=0;
5313                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
5314                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
5315                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
5316                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
5317                     break;
5318                 case CANDIDATE_MB_TYPE_BACKWARD:
5319                     s->mv_dir = MV_DIR_BACKWARD;
5320                     s->mb_intra= 0;
5321                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
5322                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
5323                     break;
5324                 case CANDIDATE_MB_TYPE_FORWARD:
5325                     s->mv_dir = MV_DIR_FORWARD;
5326                     s->mb_intra= 0;
5327                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
5328                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
5329 //                    printf(" %d %d ", motion_x, motion_y);
5330                     break;
5331                 case CANDIDATE_MB_TYPE_FORWARD_I:
5332                     s->mv_dir = MV_DIR_FORWARD;
5333                     s->mv_type = MV_TYPE_FIELD;
5334                     s->mb_intra= 0;
5335                     for(i=0; i<2; i++){
5336                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
5337                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
5338                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
5339                     }
5340                     motion_x=motion_y=0;
5341                     break;
5342                 case CANDIDATE_MB_TYPE_BACKWARD_I:
5343                     s->mv_dir = MV_DIR_BACKWARD;
5344                     s->mv_type = MV_TYPE_FIELD;
5345                     s->mb_intra= 0;
5346                     for(i=0; i<2; i++){
5347                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
5348                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
5349                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
5350                     }
5351                     motion_x=motion_y=0;
5352                     break;
5353                 case CANDIDATE_MB_TYPE_BIDIR_I:
5354                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
5355                     s->mv_type = MV_TYPE_FIELD;
5356                     s->mb_intra= 0;
5357                     for(dir=0; dir<2; dir++){
5358                         for(i=0; i<2; i++){
5359                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
5360                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
5361                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
5362                         }
5363                     }
5364                     motion_x=motion_y=0;
5365                     break;
5366                 default:
5367                     motion_x=motion_y=0; //gcc warning fix
5368                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
5369                 }
5370
5371                 encode_mb(s, motion_x, motion_y);
5372
5373                 // RAL: Update last macroblock type
5374                 s->last_mv_dir = s->mv_dir;
5375
5376                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
5377                     ff_h263_update_motion_val(s);
5378
5379                 MPV_decode_mb(s, s->block);
5380             }
5381
5382             /* clean the MV table in IPS frames for direct mode in B frames */
5383             if(s->mb_intra /* && I,P,S_TYPE */){
5384                 s->p_mv_table[xy][0]=0;
5385                 s->p_mv_table[xy][1]=0;
5386             }
5387
5388             if(s->flags&CODEC_FLAG_PSNR){
5389                 int w= 16;
5390                 int h= 16;
5391
5392                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
5393                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
5394
5395                 s->current_picture.error[0] += sse(
5396                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
5397                     s->dest[0], w, h, s->linesize);
5398                 s->current_picture.error[1] += sse(
5399                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5400                     s->dest[1], w>>1, h>>1, s->uvlinesize);
5401                 s->current_picture.error[2] += sse(
5402                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
5403                     s->dest[2], w>>1, h>>1, s->uvlinesize);
5404             }
5405             if(s->loop_filter){
5406                 if(s->out_format == FMT_H263)
5407                     ff_h263_loop_filter(s);
5408             }
5409 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
5410         }
5411     }
5412
5413     //not beautiful here but we must write it before flushing so it has to be here
5414     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
5415         msmpeg4_encode_ext_header(s);
5416
5417     write_slice_end(s);
5418
5419     /* Send the last GOB if RTP */
5420     if (s->avctx->rtp_callback) {
5421         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
5422         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
5423         /* Call the RTP callback to send the last GOB */
5424         emms_c();
5425         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
5426     }
5427
5428     return 0;
5429 }
5430
5431 #define MERGE(field) dst->field += src->field; src->field=0
5432 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
5433     MERGE(me.scene_change_score);
5434     MERGE(me.mc_mb_var_sum_temp);
5435     MERGE(me.mb_var_sum_temp);
5436 }
5437
5438 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
5439     int i;
5440
5441     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
5442     MERGE(dct_count[1]);
5443     MERGE(mv_bits);
5444     MERGE(i_tex_bits);
5445     MERGE(p_tex_bits);
5446     MERGE(i_count);
5447     MERGE(f_count);
5448     MERGE(b_count);
5449     MERGE(skip_count);
5450     MERGE(misc_bits);
5451     MERGE(error_count);
5452     MERGE(padding_bug_score);
5453     MERGE(current_picture.error[0]);
5454     MERGE(current_picture.error[1]);
5455     MERGE(current_picture.error[2]);
5456
5457     if(dst->avctx->noise_reduction){
5458         for(i=0; i<64; i++){
5459             MERGE(dct_error_sum[0][i]);
5460             MERGE(dct_error_sum[1][i]);
5461         }
5462     }
5463
5464     assert(put_bits_count(&src->pb) % 8 ==0);
5465     assert(put_bits_count(&dst->pb) % 8 ==0);
5466     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
5467     flush_put_bits(&dst->pb);
5468 }
5469
5470 static int estimate_qp(MpegEncContext *s, int dry_run){
5471     if (!s->fixed_qscale) {
5472         s->current_picture_ptr->quality=
5473         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
5474         if (s->current_picture.quality < 0)
5475             return -1;
5476     }
5477
5478     if(s->adaptive_quant){
5479         switch(s->codec_id){
5480         case CODEC_ID_MPEG4:
5481             ff_clean_mpeg4_qscales(s);
5482             break;
5483         case CODEC_ID_H263:
5484         case CODEC_ID_H263P:
5485         case CODEC_ID_FLV1:
5486             ff_clean_h263_qscales(s);
5487             break;
5488         }
5489
5490         s->lambda= s->lambda_table[0];
5491         //FIXME broken
5492     }else
5493         s->lambda= s->current_picture.quality;
5494 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
5495     update_qscale(s);
5496     return 0;
5497 }
5498
5499 static int encode_picture(MpegEncContext *s, int picture_number)
5500 {
5501     int i;
5502     int bits;
5503
5504     s->picture_number = picture_number;
5505
5506     /* Reset the average MB variance */
5507     s->me.mb_var_sum_temp    =
5508     s->me.mc_mb_var_sum_temp = 0;
5509
5510     /* we need to initialize some time vars before we can encode b-frames */
5511     // RAL: Condition added for MPEG1VIDEO
5512     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
5513         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
5514
5515     s->me.scene_change_score=0;
5516
5517 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
5518
5519     if(s->pict_type==I_TYPE){
5520         if(s->msmpeg4_version >= 3) s->no_rounding=1;
5521         else                        s->no_rounding=0;
5522     }else if(s->pict_type!=B_TYPE){
5523         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
5524             s->no_rounding ^= 1;
5525     }
5526
5527     if(s->flags & CODEC_FLAG_PASS2){
5528         if (estimate_qp(s,1) < 0)
5529             return -1;
5530         ff_get_2pass_fcode(s);
5531     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
5532         if(s->pict_type==B_TYPE)
5533             s->lambda= s->last_lambda_for[s->pict_type];
5534         else
5535             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
5536         update_qscale(s);
5537     }
5538
5539     s->mb_intra=0; //for the rate distortion & bit compare functions
5540     for(i=1; i<s->avctx->thread_count; i++){
5541         ff_update_duplicate_context(s->thread_context[i], s);
5542     }
5543
5544     ff_init_me(s);
5545
5546     /* Estimate motion for every MB */
5547     if(s->pict_type != I_TYPE){
5548         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
5549         s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8;
5550         if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){
5551             if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){
5552                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5553             }
5554         }
5555
5556         s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5557     }else /* if(s->pict_type == I_TYPE) */{
5558         /* I-Frame */
5559         for(i=0; i<s->mb_stride*s->mb_height; i++)
5560             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5561
5562         if(!s->fixed_qscale){
5563             /* finding spatial complexity for I-frame rate control */
5564             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5565         }
5566     }
5567     for(i=1; i<s->avctx->thread_count; i++){
5568         merge_context_after_me(s, s->thread_context[i]);
5569     }
5570     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
5571     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
5572     emms_c();
5573
5574     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
5575         s->pict_type= I_TYPE;
5576         for(i=0; i<s->mb_stride*s->mb_height; i++)
5577             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
5578 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
5579     }
5580
5581     if(!s->umvplus){
5582         if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
5583             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
5584
5585             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5586                 int a,b;
5587                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
5588                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
5589                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
5590             }
5591
5592             ff_fix_long_p_mvs(s);
5593             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
5594             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5595                 int j;
5596                 for(i=0; i<2; i++){
5597                     for(j=0; j<2; j++)
5598                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
5599                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
5600                 }
5601             }
5602         }
5603
5604         if(s->pict_type==B_TYPE){
5605             int a, b;
5606
5607             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
5608             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5609             s->f_code = FFMAX(a, b);
5610
5611             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
5612             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
5613             s->b_code = FFMAX(a, b);
5614
5615             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
5616             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
5617             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5618             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
5619             if(s->flags & CODEC_FLAG_INTERLACED_ME){
5620                 int dir, j;
5621                 for(dir=0; dir<2; dir++){
5622                     for(i=0; i<2; i++){
5623                         for(j=0; j<2; j++){
5624                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
5625                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
5626                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
5627                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
5628                         }
5629                     }
5630                 }
5631             }
5632         }
5633     }
5634
5635     if (estimate_qp(s, 0) < 0)
5636         return -1;
5637
5638     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
5639         s->qscale= 3; //reduce clipping problems
5640
5641     if (s->out_format == FMT_MJPEG) {
5642         /* for mjpeg, we do include qscale in the matrix */
5643         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
5644         for(i=1;i<64;i++){
5645             int j= s->dsp.idct_permutation[i];
5646
5647             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
5648         }
5649         convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
5650                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
5651         s->qscale= 8;
5652     }
5653
5654     //FIXME var duplication
5655     s->current_picture_ptr->key_frame=
5656     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
5657     s->current_picture_ptr->pict_type=
5658     s->current_picture.pict_type= s->pict_type;
5659
5660     if(s->current_picture.key_frame)
5661         s->picture_in_gop_number=0;
5662
5663     s->last_bits= put_bits_count(&s->pb);
5664     switch(s->out_format) {
5665     case FMT_MJPEG:
5666         mjpeg_picture_header(s);
5667         break;
5668 #ifdef CONFIG_H261_ENCODER
5669     case FMT_H261:
5670         ff_h261_encode_picture_header(s, picture_number);
5671         break;
5672 #endif
5673     case FMT_H263:
5674         if (s->codec_id == CODEC_ID_WMV2)
5675             ff_wmv2_encode_picture_header(s, picture_number);
5676         else if (s->h263_msmpeg4)
5677             msmpeg4_encode_picture_header(s, picture_number);
5678         else if (s->h263_pred)
5679             mpeg4_encode_picture_header(s, picture_number);
5680 #ifdef CONFIG_RV10_ENCODER
5681         else if (s->codec_id == CODEC_ID_RV10)
5682             rv10_encode_picture_header(s, picture_number);
5683 #endif
5684 #ifdef CONFIG_RV20_ENCODER
5685         else if (s->codec_id == CODEC_ID_RV20)
5686             rv20_encode_picture_header(s, picture_number);
5687 #endif
5688         else if (s->codec_id == CODEC_ID_FLV1)
5689             ff_flv_encode_picture_header(s, picture_number);
5690         else
5691             h263_encode_picture_header(s, picture_number);
5692         break;
5693     case FMT_MPEG1:
5694         mpeg1_encode_picture_header(s, picture_number);
5695         break;
5696     case FMT_H264:
5697         break;
5698     default:
5699         assert(0);
5700     }
5701     bits= put_bits_count(&s->pb);
5702     s->header_bits= bits - s->last_bits;
5703
5704     for(i=1; i<s->avctx->thread_count; i++){
5705         update_duplicate_context_after_me(s->thread_context[i], s);
5706     }
5707     s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
5708     for(i=1; i<s->avctx->thread_count; i++){
5709         merge_context_after_encode(s, s->thread_context[i]);
5710     }
5711     emms_c();
5712     return 0;
5713 }
5714
5715 static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
5716     const int intra= s->mb_intra;
5717     int i;
5718
5719     s->dct_count[intra]++;
5720
5721     for(i=0; i<64; i++){
5722         int level= block[i];
5723
5724         if(level){
5725             if(level>0){
5726                 s->dct_error_sum[intra][i] += level;
5727                 level -= s->dct_offset[intra][i];
5728                 if(level<0) level=0;
5729             }else{
5730                 s->dct_error_sum[intra][i] -= level;
5731                 level += s->dct_offset[intra][i];
5732                 if(level>0) level=0;
5733             }
5734             block[i]= level;
5735         }
5736     }
5737 }
5738
5739 static int dct_quantize_trellis_c(MpegEncContext *s,
5740                         DCTELEM *block, int n,
5741                         int qscale, int *overflow){
5742     const int *qmat;
5743     const uint8_t *scantable= s->intra_scantable.scantable;
5744     const uint8_t *perm_scantable= s->intra_scantable.permutated;
5745     int max=0;
5746     unsigned int threshold1, threshold2;
5747     int bias=0;
5748     int run_tab[65];
5749     int level_tab[65];
5750     int score_tab[65];
5751     int survivor[65];
5752     int survivor_count;
5753     int last_run=0;
5754     int last_level=0;
5755     int last_score= 0;
5756     int last_i;
5757     int coeff[2][64];
5758     int coeff_count[64];
5759     int qmul, qadd, start_i, last_non_zero, i, dc;
5760     const int esc_length= s->ac_esc_length;
5761     uint8_t * length;
5762     uint8_t * last_length;
5763     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
5764
5765     s->dsp.fdct (block);
5766
5767     if(s->dct_error_sum)
5768         s->denoise_dct(s, block);
5769     qmul= qscale*16;
5770     qadd= ((qscale-1)|1)*8;
5771
5772     if (s->mb_intra) {
5773         int q;
5774         if (!s->h263_aic) {
5775             if (n < 4)
5776                 q = s->y_dc_scale;
5777             else
5778                 q = s->c_dc_scale;
5779             q = q << 3;
5780         } else{
5781             /* For AIC we skip quant/dequant of INTRADC */
5782             q = 1 << 3;
5783             qadd=0;
5784         }
5785
5786         /* note: block[0] is assumed to be positive */
5787         block[0] = (block[0] + (q >> 1)) / q;
5788         start_i = 1;
5789         last_non_zero = 0;
5790         qmat = s->q_intra_matrix[qscale];
5791         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
5792             bias= 1<<(QMAT_SHIFT-1);
5793         length     = s->intra_ac_vlc_length;
5794         last_length= s->intra_ac_vlc_last_length;
5795     } else {
5796         start_i = 0;
5797         last_non_zero = -1;
5798         qmat = s->q_inter_matrix[qscale];
5799         length     = s->inter_ac_vlc_length;
5800         last_length= s->inter_ac_vlc_last_length;
5801     }
5802     last_i= start_i;
5803
5804     threshold1= (1<<QMAT_SHIFT) - bias - 1;
5805     threshold2= (threshold1<<1);
5806
5807     for(i=63; i>=start_i; i--) {
5808         const int j = scantable[i];
5809         int level = block[j] * qmat[j];
5810
5811         if(((unsigned)(level+threshold1))>threshold2){
5812             last_non_zero = i;
5813             break;
5814         }
5815     }
5816
5817     for(i=start_i; i<=last_non_zero; i++) {
5818         const int j = scantable[i];
5819         int level = block[j] * qmat[j];
5820
5821 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
5822 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
5823         if(((unsigned)(level+threshold1))>threshold2){
5824             if(level>0){
5825                 level= (bias + level)>>QMAT_SHIFT;
5826                 coeff[0][i]= level;
5827                 coeff[1][i]= level-1;
5828 //                coeff[2][k]= level-2;
5829             }else{
5830                 level= (bias - level)>>QMAT_SHIFT;
5831                 coeff[0][i]= -level;
5832                 coeff[1][i]= -level+1;
5833 //                coeff[2][k]= -level+2;
5834             }
5835             coeff_count[i]= FFMIN(level, 2);
5836             assert(coeff_count[i]);
5837             max |=level;
5838         }else{
5839             coeff[0][i]= (level>>31)|1;
5840             coeff_count[i]= 1;
5841         }
5842     }
5843
5844     *overflow= s->max_qcoeff < max; //overflow might have happened
5845
5846     if(last_non_zero < start_i){
5847         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5848         return last_non_zero;
5849     }
5850
5851     score_tab[start_i]= 0;
5852     survivor[0]= start_i;
5853     survivor_count= 1;
5854
5855     for(i=start_i; i<=last_non_zero; i++){
5856         int level_index, j;
5857         const int dct_coeff= ABS(block[ scantable[i] ]);
5858         const int zero_distoration= dct_coeff*dct_coeff;
5859         int best_score=256*256*256*120;
5860         for(level_index=0; level_index < coeff_count[i]; level_index++){
5861             int distoration;
5862             int level= coeff[level_index][i];
5863             const int alevel= ABS(level);
5864             int unquant_coeff;
5865
5866             assert(level);
5867
5868             if(s->out_format == FMT_H263){
5869                 unquant_coeff= alevel*qmul + qadd;
5870             }else{ //MPEG1
5871                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
5872                 if(s->mb_intra){
5873                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
5874                         unquant_coeff =   (unquant_coeff - 1) | 1;
5875                 }else{
5876                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
5877                         unquant_coeff =   (unquant_coeff - 1) | 1;
5878                 }
5879                 unquant_coeff<<= 3;
5880             }
5881
5882             distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
5883             level+=64;
5884             if((level&(~127)) == 0){
5885                 for(j=survivor_count-1; j>=0; j--){
5886                     int run= i - survivor[j];
5887                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5888                     score += score_tab[i-run];
5889
5890                     if(score < best_score){
5891                         best_score= score;
5892                         run_tab[i+1]= run;
5893                         level_tab[i+1]= level-64;
5894                     }
5895                 }
5896
5897                 if(s->out_format == FMT_H263){
5898                     for(j=survivor_count-1; j>=0; j--){
5899                         int run= i - survivor[j];
5900                         int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
5901                         score += score_tab[i-run];
5902                         if(score < last_score){
5903                             last_score= score;
5904                             last_run= run;
5905                             last_level= level-64;
5906                             last_i= i+1;
5907                         }
5908                     }
5909                 }
5910             }else{
5911                 distoration += esc_length*lambda;
5912                 for(j=survivor_count-1; j>=0; j--){
5913                     int run= i - survivor[j];
5914                     int score= distoration + score_tab[i-run];
5915
5916                     if(score < best_score){
5917                         best_score= score;
5918                         run_tab[i+1]= run;
5919                         level_tab[i+1]= level-64;
5920                     }
5921                 }
5922
5923                 if(s->out_format == FMT_H263){
5924                   for(j=survivor_count-1; j>=0; j--){
5925                         int run= i - survivor[j];
5926                         int score= distoration + score_tab[i-run];
5927                         if(score < last_score){
5928                             last_score= score;
5929                             last_run= run;
5930                             last_level= level-64;
5931                             last_i= i+1;
5932                         }
5933                     }
5934                 }
5935             }
5936         }
5937
5938         score_tab[i+1]= best_score;
5939
5940         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
5941         if(last_non_zero <= 27){
5942             for(; survivor_count; survivor_count--){
5943                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
5944                     break;
5945             }
5946         }else{
5947             for(; survivor_count; survivor_count--){
5948                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
5949                     break;
5950             }
5951         }
5952
5953         survivor[ survivor_count++ ]= i+1;
5954     }
5955
5956     if(s->out_format != FMT_H263){
5957         last_score= 256*256*256*120;
5958         for(i= survivor[0]; i<=last_non_zero + 1; i++){
5959             int score= score_tab[i];
5960             if(i) score += lambda*2; //FIXME exacter?
5961
5962             if(score < last_score){
5963                 last_score= score;
5964                 last_i= i;
5965                 last_level= level_tab[i];
5966                 last_run= run_tab[i];
5967             }
5968         }
5969     }
5970
5971     s->coded_score[n] = last_score;
5972
5973     dc= ABS(block[0]);
5974     last_non_zero= last_i - 1;
5975     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
5976
5977     if(last_non_zero < start_i)
5978         return last_non_zero;
5979
5980     if(last_non_zero == 0 && start_i == 0){
5981         int best_level= 0;
5982         int best_score= dc * dc;
5983
5984         for(i=0; i<coeff_count[0]; i++){
5985             int level= coeff[i][0];
5986             int alevel= ABS(level);
5987             int unquant_coeff, score, distortion;
5988
5989             if(s->out_format == FMT_H263){
5990                     unquant_coeff= (alevel*qmul + qadd)>>3;
5991             }else{ //MPEG1
5992                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
5993                     unquant_coeff =   (unquant_coeff - 1) | 1;
5994             }
5995             unquant_coeff = (unquant_coeff + 4) >> 3;
5996             unquant_coeff<<= 3 + 3;
5997
5998             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
5999             level+=64;
6000             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
6001             else                    score= distortion + esc_length*lambda;
6002
6003             if(score < best_score){
6004                 best_score= score;
6005                 best_level= level - 64;
6006             }
6007         }
6008         block[0]= best_level;
6009         s->coded_score[n] = best_score - dc*dc;
6010         if(best_level == 0) return -1;
6011         else                return last_non_zero;
6012     }
6013
6014     i= last_i;
6015     assert(last_level);
6016
6017     block[ perm_scantable[last_non_zero] ]= last_level;
6018     i -= last_run + 1;
6019
6020     for(; i>start_i; i -= run_tab[i] + 1){
6021         block[ perm_scantable[i-1] ]= level_tab[i];
6022     }
6023
6024     return last_non_zero;
6025 }
6026
6027 //#define REFINE_STATS 1
6028 static int16_t basis[64][64];
6029
6030 static void build_basis(uint8_t *perm){
6031     int i, j, x, y;
6032     emms_c();
6033     for(i=0; i<8; i++){
6034         for(j=0; j<8; j++){
6035             for(y=0; y<8; y++){
6036                 for(x=0; x<8; x++){
6037                     double s= 0.25*(1<<BASIS_SHIFT);
6038                     int index= 8*i + j;
6039                     int perm_index= perm[index];
6040                     if(i==0) s*= sqrt(0.5);
6041                     if(j==0) s*= sqrt(0.5);
6042                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
6043                 }
6044             }
6045         }
6046     }
6047 }
6048
6049 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
6050                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
6051                         int n, int qscale){
6052     int16_t rem[64];
6053     DECLARE_ALIGNED_16(DCTELEM, d1[64]);
6054     const int *qmat;
6055     const uint8_t *scantable= s->intra_scantable.scantable;
6056     const uint8_t *perm_scantable= s->intra_scantable.permutated;
6057 //    unsigned int threshold1, threshold2;
6058 //    int bias=0;
6059     int run_tab[65];
6060     int prev_run=0;
6061     int prev_level=0;
6062     int qmul, qadd, start_i, last_non_zero, i, dc;
6063     uint8_t * length;
6064     uint8_t * last_length;
6065     int lambda;
6066     int rle_index, run, q, sum;
6067 #ifdef REFINE_STATS
6068 static int count=0;
6069 static int after_last=0;
6070 static int to_zero=0;
6071 static int from_zero=0;
6072 static int raise=0;
6073 static int lower=0;
6074 static int messed_sign=0;
6075 #endif
6076
6077     if(basis[0][0] == 0)
6078         build_basis(s->dsp.idct_permutation);
6079
6080     qmul= qscale*2;
6081     qadd= (qscale-1)|1;
6082     if (s->mb_intra) {
6083         if (!s->h263_aic) {
6084             if (n < 4)
6085                 q = s->y_dc_scale;
6086             else
6087                 q = s->c_dc_scale;
6088         } else{
6089             /* For AIC we skip quant/dequant of INTRADC */
6090             q = 1;
6091             qadd=0;
6092         }
6093         q <<= RECON_SHIFT-3;
6094         /* note: block[0] is assumed to be positive */
6095         dc= block[0]*q;
6096 //        block[0] = (block[0] + (q >> 1)) / q;
6097         start_i = 1;
6098         qmat = s->q_intra_matrix[qscale];
6099 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
6100 //            bias= 1<<(QMAT_SHIFT-1);
6101         length     = s->intra_ac_vlc_length;
6102         last_length= s->intra_ac_vlc_last_length;
6103     } else {
6104         dc= 0;
6105         start_i = 0;
6106         qmat = s->q_inter_matrix[qscale];
6107         length     = s->inter_ac_vlc_length;
6108         last_length= s->inter_ac_vlc_last_length;
6109     }
6110     last_non_zero = s->block_last_index[n];
6111
6112 #ifdef REFINE_STATS
6113 {START_TIMER
6114 #endif
6115     dc += (1<<(RECON_SHIFT-1));
6116     for(i=0; i<64; i++){
6117         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
6118     }
6119 #ifdef REFINE_STATS
6120 STOP_TIMER("memset rem[]")}
6121 #endif
6122     sum=0;
6123     for(i=0; i<64; i++){
6124         int one= 36;
6125         int qns=4;
6126         int w;
6127
6128         w= ABS(weight[i]) + qns*one;
6129         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
6130
6131         weight[i] = w;
6132 //        w=weight[i] = (63*qns + (w/2)) / w;
6133
6134         assert(w>0);
6135         assert(w<(1<<6));
6136         sum += w*w;
6137     }
6138     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
6139 #ifdef REFINE_STATS
6140 {START_TIMER
6141 #endif
6142     run=0;
6143     rle_index=0;
6144     for(i=start_i; i<=last_non_zero; i++){
6145         int j= perm_scantable[i];
6146         const int level= block[j];
6147         int coeff;
6148
6149         if(level){
6150             if(level<0) coeff= qmul*level - qadd;
6151             else        coeff= qmul*level + qadd;
6152             run_tab[rle_index++]=run;
6153             run=0;
6154
6155             s->dsp.add_8x8basis(rem, basis[j], coeff);
6156         }else{
6157             run++;
6158         }
6159     }
6160 #ifdef REFINE_STATS
6161 if(last_non_zero>0){
6162 STOP_TIMER("init rem[]")
6163 }
6164 }
6165
6166 {START_TIMER
6167 #endif
6168     for(;;){
6169         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
6170         int best_coeff=0;
6171         int best_change=0;
6172         int run2, best_unquant_change=0, analyze_gradient;
6173 #ifdef REFINE_STATS
6174 {START_TIMER
6175 #endif
6176         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
6177
6178         if(analyze_gradient){
6179 #ifdef REFINE_STATS
6180 {START_TIMER
6181 #endif
6182             for(i=0; i<64; i++){
6183                 int w= weight[i];
6184
6185                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
6186             }
6187 #ifdef REFINE_STATS
6188 STOP_TIMER("rem*w*w")}
6189 {START_TIMER
6190 #endif
6191             s->dsp.fdct(d1);
6192 #ifdef REFINE_STATS
6193 STOP_TIMER("dct")}
6194 #endif
6195         }
6196
6197         if(start_i){
6198             const int level= block[0];
6199             int change, old_coeff;
6200
6201             assert(s->mb_intra);
6202
6203             old_coeff= q*level;
6204
6205             for(change=-1; change<=1; change+=2){
6206                 int new_level= level + change;
6207                 int score, new_coeff;
6208
6209                 new_coeff= q*new_level;
6210                 if(new_coeff >= 2048 || new_coeff < 0)
6211                     continue;
6212
6213                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
6214                 if(score<best_score){
6215                     best_score= score;
6216                     best_coeff= 0;
6217                     best_change= change;
6218                     best_unquant_change= new_coeff - old_coeff;
6219                 }
6220             }
6221         }
6222
6223         run=0;
6224         rle_index=0;
6225         run2= run_tab[rle_index++];
6226         prev_level=0;
6227         prev_run=0;
6228
6229         for(i=start_i; i<64; i++){
6230             int j= perm_scantable[i];
6231             const int level= block[j];
6232             int change, old_coeff;
6233
6234             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
6235                 break;
6236
6237             if(level){
6238                 if(level<0) old_coeff= qmul*level - qadd;
6239                 else        old_coeff= qmul*level + qadd;
6240                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
6241             }else{
6242                 old_coeff=0;
6243                 run2--;
6244                 assert(run2>=0 || i >= last_non_zero );
6245             }
6246
6247             for(change=-1; change<=1; change+=2){
6248                 int new_level= level + change;
6249                 int score, new_coeff, unquant_change;
6250
6251                 score=0;
6252                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
6253                    continue;
6254
6255                 if(new_level){
6256                     if(new_level<0) new_coeff= qmul*new_level - qadd;
6257                     else            new_coeff= qmul*new_level + qadd;
6258                     if(new_coeff >= 2048 || new_coeff <= -2048)
6259                         continue;
6260                     //FIXME check for overflow
6261
6262                     if(level){
6263                         if(level < 63 && level > -63){
6264                             if(i < last_non_zero)
6265                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
6266                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
6267                             else
6268                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
6269                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
6270                         }
6271                     }else{
6272                         assert(ABS(new_level)==1);
6273
6274                         if(analyze_gradient){
6275                             int g= d1[ scantable[i] ];
6276                             if(g && (g^new_level) >= 0)
6277                                 continue;
6278                         }
6279
6280                         if(i < last_non_zero){
6281                             int next_i= i + run2 + 1;
6282                             int next_level= block[ perm_scantable[next_i] ] + 64;
6283
6284                             if(next_level&(~127))
6285                                 next_level= 0;
6286
6287                             if(next_i < last_non_zero)
6288                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
6289                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
6290                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6291                             else
6292                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
6293                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6294                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
6295                         }else{
6296                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
6297                             if(prev_level){
6298                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6299                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6300                             }
6301                         }
6302                     }
6303                 }else{
6304                     new_coeff=0;
6305                     assert(ABS(level)==1);
6306
6307                     if(i < last_non_zero){
6308                         int next_i= i + run2 + 1;
6309                         int next_level= block[ perm_scantable[next_i] ] + 64;
6310
6311                         if(next_level&(~127))
6312                             next_level= 0;
6313
6314                         if(next_i < last_non_zero)
6315                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6316                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
6317                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6318                         else
6319                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
6320                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
6321                                      - length[UNI_AC_ENC_INDEX(run, 65)];
6322                     }else{
6323                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
6324                         if(prev_level){
6325                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
6326                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
6327                         }
6328                     }
6329                 }
6330
6331                 score *= lambda;
6332
6333                 unquant_change= new_coeff - old_coeff;
6334                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
6335
6336                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
6337                 if(score<best_score){
6338                     best_score= score;
6339                     best_coeff= i;
6340                     best_change= change;
6341                     best_unquant_change= unquant_change;
6342                 }
6343             }
6344             if(level){
6345                 prev_level= level + 64;
6346                 if(prev_level&(~127))
6347                     prev_level= 0;
6348                 prev_run= run;
6349                 run=0;
6350             }else{
6351                 run++;
6352             }
6353         }
6354 #ifdef REFINE_STATS
6355 STOP_TIMER("iterative step")}
6356 #endif
6357
6358         if(best_change){
6359             int j= perm_scantable[ best_coeff ];
6360
6361             block[j] += best_change;
6362
6363             if(best_coeff > last_non_zero){
6364                 last_non_zero= best_coeff;
6365                 assert(block[j]);
6366 #ifdef REFINE_STATS
6367 after_last++;
6368 #endif
6369             }else{
6370 #ifdef REFINE_STATS
6371 if(block[j]){
6372     if(block[j] - best_change){
6373         if(ABS(block[j]) > ABS(block[j] - best_change)){
6374             raise++;
6375         }else{
6376             lower++;
6377         }
6378     }else{
6379         from_zero++;
6380     }
6381 }else{
6382     to_zero++;
6383 }
6384 #endif
6385                 for(; last_non_zero>=start_i; last_non_zero--){
6386                     if(block[perm_scantable[last_non_zero]])
6387                         break;
6388                 }
6389             }
6390 #ifdef REFINE_STATS
6391 count++;
6392 if(256*256*256*64 % count == 0){
6393     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
6394 }
6395 #endif
6396             run=0;
6397             rle_index=0;
6398             for(i=start_i; i<=last_non_zero; i++){
6399                 int j= perm_scantable[i];
6400                 const int level= block[j];
6401
6402                  if(level){
6403                      run_tab[rle_index++]=run;
6404                      run=0;
6405                  }else{
6406                      run++;
6407                  }
6408             }
6409
6410             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
6411         }else{
6412             break;
6413         }
6414     }
6415 #ifdef REFINE_STATS
6416 if(last_non_zero>0){
6417 STOP_TIMER("iterative search")
6418 }
6419 }
6420 #endif
6421
6422     return last_non_zero;
6423 }
6424
6425 static int dct_quantize_c(MpegEncContext *s,
6426                         DCTELEM *block, int n,
6427                         int qscale, int *overflow)
6428 {
6429     int i, j, level, last_non_zero, q, start_i;
6430     const int *qmat;
6431     const uint8_t *scantable= s->intra_scantable.scantable;
6432     int bias;
6433     int max=0;
6434     unsigned int threshold1, threshold2;
6435
6436     s->dsp.fdct (block);
6437
6438     if(s->dct_error_sum)
6439         s->denoise_dct(s, block);
6440
6441     if (s->mb_intra) {
6442         if (!s->h263_aic) {
6443             if (n < 4)
6444                 q = s->y_dc_scale;
6445             else
6446                 q = s->c_dc_scale;
6447             q = q << 3;
6448         } else
6449             /* For AIC we skip quant/dequant of INTRADC */
6450             q = 1 << 3;
6451
6452         /* note: block[0] is assumed to be positive */
6453         block[0] = (block[0] + (q >> 1)) / q;
6454         start_i = 1;
6455         last_non_zero = 0;
6456         qmat = s->q_intra_matrix[qscale];
6457         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6458     } else {
6459         start_i = 0;
6460         last_non_zero = -1;
6461         qmat = s->q_inter_matrix[qscale];
6462         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
6463     }
6464     threshold1= (1<<QMAT_SHIFT) - bias - 1;
6465     threshold2= (threshold1<<1);
6466     for(i=63;i>=start_i;i--) {
6467         j = scantable[i];
6468         level = block[j] * qmat[j];
6469
6470         if(((unsigned)(level+threshold1))>threshold2){
6471             last_non_zero = i;
6472             break;
6473         }else{
6474             block[j]=0;
6475         }
6476     }
6477     for(i=start_i; i<=last_non_zero; i++) {
6478         j = scantable[i];
6479         level = block[j] * qmat[j];
6480
6481 //        if(   bias+level >= (1<<QMAT_SHIFT)
6482 //           || bias-level >= (1<<QMAT_SHIFT)){
6483         if(((unsigned)(level+threshold1))>threshold2){
6484             if(level>0){
6485                 level= (bias + level)>>QMAT_SHIFT;
6486                 block[j]= level;
6487             }else{
6488                 level= (bias - level)>>QMAT_SHIFT;
6489                 block[j]= -level;
6490             }
6491             max |=level;
6492         }else{
6493             block[j]=0;
6494         }
6495     }
6496     *overflow= s->max_qcoeff < max; //overflow might have happened
6497
6498     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
6499     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
6500         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
6501
6502     return last_non_zero;
6503 }
6504
6505 #endif //CONFIG_ENCODERS
6506
6507 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
6508                                    DCTELEM *block, int n, int qscale)
6509 {
6510     int i, level, nCoeffs;
6511     const uint16_t *quant_matrix;
6512
6513     nCoeffs= s->block_last_index[n];
6514
6515     if (n < 4)
6516         block[0] = block[0] * s->y_dc_scale;
6517     else
6518         block[0] = block[0] * s->c_dc_scale;
6519     /* XXX: only mpeg1 */
6520     quant_matrix = s->intra_matrix;
6521     for(i=1;i<=nCoeffs;i++) {
6522         int j= s->intra_scantable.permutated[i];
6523         level = block[j];
6524         if (level) {
6525             if (level < 0) {
6526                 level = -level;
6527                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6528                 level = (level - 1) | 1;
6529                 level = -level;
6530             } else {
6531                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6532                 level = (level - 1) | 1;
6533             }
6534             block[j] = level;
6535         }
6536     }
6537 }
6538
6539 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
6540                                    DCTELEM *block, int n, int qscale)
6541 {
6542     int i, level, nCoeffs;
6543     const uint16_t *quant_matrix;
6544
6545     nCoeffs= s->block_last_index[n];
6546
6547     quant_matrix = s->inter_matrix;
6548     for(i=0; i<=nCoeffs; i++) {
6549         int j= s->intra_scantable.permutated[i];
6550         level = block[j];
6551         if (level) {
6552             if (level < 0) {
6553                 level = -level;
6554                 level = (((level << 1) + 1) * qscale *
6555                          ((int) (quant_matrix[j]))) >> 4;
6556                 level = (level - 1) | 1;
6557                 level = -level;
6558             } else {
6559                 level = (((level << 1) + 1) * qscale *
6560                          ((int) (quant_matrix[j]))) >> 4;
6561                 level = (level - 1) | 1;
6562             }
6563             block[j] = level;
6564         }
6565     }
6566 }
6567
6568 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
6569                                    DCTELEM *block, int n, int qscale)
6570 {
6571     int i, level, nCoeffs;
6572     const uint16_t *quant_matrix;
6573
6574     if(s->alternate_scan) nCoeffs= 63;
6575     else nCoeffs= s->block_last_index[n];
6576
6577     if (n < 4)
6578         block[0] = block[0] * s->y_dc_scale;
6579     else
6580         block[0] = block[0] * s->c_dc_scale;
6581     quant_matrix = s->intra_matrix;
6582     for(i=1;i<=nCoeffs;i++) {
6583         int j= s->intra_scantable.permutated[i];
6584         level = block[j];
6585         if (level) {
6586             if (level < 0) {
6587                 level = -level;
6588                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6589                 level = -level;
6590             } else {
6591                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6592             }
6593             block[j] = level;
6594         }
6595     }
6596 }
6597
6598 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
6599                                    DCTELEM *block, int n, int qscale)
6600 {
6601     int i, level, nCoeffs;
6602     const uint16_t *quant_matrix;
6603     int sum=-1;
6604
6605     if(s->alternate_scan) nCoeffs= 63;
6606     else nCoeffs= s->block_last_index[n];
6607
6608     if (n < 4)
6609         block[0] = block[0] * s->y_dc_scale;
6610     else
6611         block[0] = block[0] * s->c_dc_scale;
6612     quant_matrix = s->intra_matrix;
6613     for(i=1;i<=nCoeffs;i++) {
6614         int j= s->intra_scantable.permutated[i];
6615         level = block[j];
6616         if (level) {
6617             if (level < 0) {
6618                 level = -level;
6619                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6620                 level = -level;
6621             } else {
6622                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
6623             }
6624             block[j] = level;
6625             sum+=level;
6626         }
6627     }
6628     block[63]^=sum&1;
6629 }
6630
6631 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
6632                                    DCTELEM *block, int n, int qscale)
6633 {
6634     int i, level, nCoeffs;
6635     const uint16_t *quant_matrix;
6636     int sum=-1;
6637
6638     if(s->alternate_scan) nCoeffs= 63;
6639     else nCoeffs= s->block_last_index[n];
6640
6641     quant_matrix = s->inter_matrix;
6642     for(i=0; i<=nCoeffs; i++) {
6643         int j= s->intra_scantable.permutated[i];
6644         level = block[j];
6645         if (level) {
6646             if (level < 0) {
6647                 level = -level;
6648                 level = (((level << 1) + 1) * qscale *
6649                          ((int) (quant_matrix[j]))) >> 4;
6650                 level = -level;
6651             } else {
6652                 level = (((level << 1) + 1) * qscale *
6653                          ((int) (quant_matrix[j]))) >> 4;
6654             }
6655             block[j] = level;
6656             sum+=level;
6657         }
6658     }
6659     block[63]^=sum&1;
6660 }
6661
6662 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
6663                                   DCTELEM *block, int n, int qscale)
6664 {
6665     int i, level, qmul, qadd;
6666     int nCoeffs;
6667
6668     assert(s->block_last_index[n]>=0);
6669
6670     qmul = qscale << 1;
6671
6672     if (!s->h263_aic) {
6673         if (n < 4)
6674             block[0] = block[0] * s->y_dc_scale;
6675         else
6676             block[0] = block[0] * s->c_dc_scale;
6677         qadd = (qscale - 1) | 1;
6678     }else{
6679         qadd = 0;
6680     }
6681     if(s->ac_pred)
6682         nCoeffs=63;
6683     else
6684         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6685
6686     for(i=1; i<=nCoeffs; i++) {
6687         level = block[i];
6688         if (level) {
6689             if (level < 0) {
6690                 level = level * qmul - qadd;
6691             } else {
6692                 level = level * qmul + qadd;
6693             }
6694             block[i] = level;
6695         }
6696     }
6697 }
6698
6699 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
6700                                   DCTELEM *block, int n, int qscale)
6701 {
6702     int i, level, qmul, qadd;
6703     int nCoeffs;
6704
6705     assert(s->block_last_index[n]>=0);
6706
6707     qadd = (qscale - 1) | 1;
6708     qmul = qscale << 1;
6709
6710     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
6711
6712     for(i=0; i<=nCoeffs; i++) {
6713         level = block[i];
6714         if (level) {
6715             if (level < 0) {
6716                 level = level * qmul - qadd;
6717             } else {
6718                 level = level * qmul + qadd;
6719             }
6720             block[i] = level;
6721         }
6722     }
6723 }
6724
6725 #ifdef CONFIG_ENCODERS
6726 AVCodec h263_encoder = {
6727     "h263",
6728     CODEC_TYPE_VIDEO,
6729     CODEC_ID_H263,
6730     sizeof(MpegEncContext),
6731     MPV_encode_init,
6732     MPV_encode_picture,
6733     MPV_encode_end,
6734     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6735 };
6736
6737 AVCodec h263p_encoder = {
6738     "h263p",
6739     CODEC_TYPE_VIDEO,
6740     CODEC_ID_H263P,
6741     sizeof(MpegEncContext),
6742     MPV_encode_init,
6743     MPV_encode_picture,
6744     MPV_encode_end,
6745     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6746 };
6747
6748 AVCodec flv_encoder = {
6749     "flv",
6750     CODEC_TYPE_VIDEO,
6751     CODEC_ID_FLV1,
6752     sizeof(MpegEncContext),
6753     MPV_encode_init,
6754     MPV_encode_picture,
6755     MPV_encode_end,
6756     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6757 };
6758
6759 AVCodec rv10_encoder = {
6760     "rv10",
6761     CODEC_TYPE_VIDEO,
6762     CODEC_ID_RV10,
6763     sizeof(MpegEncContext),
6764     MPV_encode_init,
6765     MPV_encode_picture,
6766     MPV_encode_end,
6767     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6768 };
6769
6770 AVCodec rv20_encoder = {
6771     "rv20",
6772     CODEC_TYPE_VIDEO,
6773     CODEC_ID_RV20,
6774     sizeof(MpegEncContext),
6775     MPV_encode_init,
6776     MPV_encode_picture,
6777     MPV_encode_end,
6778     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6779 };
6780
6781 AVCodec mpeg4_encoder = {
6782     "mpeg4",
6783     CODEC_TYPE_VIDEO,
6784     CODEC_ID_MPEG4,
6785     sizeof(MpegEncContext),
6786     MPV_encode_init,
6787     MPV_encode_picture,
6788     MPV_encode_end,
6789     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6790     .capabilities= CODEC_CAP_DELAY,
6791 };
6792
6793 AVCodec msmpeg4v1_encoder = {
6794     "msmpeg4v1",
6795     CODEC_TYPE_VIDEO,
6796     CODEC_ID_MSMPEG4V1,
6797     sizeof(MpegEncContext),
6798     MPV_encode_init,
6799     MPV_encode_picture,
6800     MPV_encode_end,
6801     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6802 };
6803
6804 AVCodec msmpeg4v2_encoder = {
6805     "msmpeg4v2",
6806     CODEC_TYPE_VIDEO,
6807     CODEC_ID_MSMPEG4V2,
6808     sizeof(MpegEncContext),
6809     MPV_encode_init,
6810     MPV_encode_picture,
6811     MPV_encode_end,
6812     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6813 };
6814
6815 AVCodec msmpeg4v3_encoder = {
6816     "msmpeg4",
6817     CODEC_TYPE_VIDEO,
6818     CODEC_ID_MSMPEG4V3,
6819     sizeof(MpegEncContext),
6820     MPV_encode_init,
6821     MPV_encode_picture,
6822     MPV_encode_end,
6823     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6824 };
6825
6826 AVCodec wmv1_encoder = {
6827     "wmv1",
6828     CODEC_TYPE_VIDEO,
6829     CODEC_ID_WMV1,
6830     sizeof(MpegEncContext),
6831     MPV_encode_init,
6832     MPV_encode_picture,
6833     MPV_encode_end,
6834     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1},
6835 };
6836
6837 AVCodec mjpeg_encoder = {
6838     "mjpeg",
6839     CODEC_TYPE_VIDEO,
6840     CODEC_ID_MJPEG,
6841     sizeof(MpegEncContext),
6842     MPV_encode_init,
6843     MPV_encode_picture,
6844     MPV_encode_end,
6845     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1},
6846 };
6847
6848 #endif //CONFIG_ENCODERS