]> git.sesse.net Git - ffmpeg/blob - libavcodec/huffyuv.c
change brute force search to min-heap. 3.6x faster generate_len_table, 8% faster...
[ffmpeg] / libavcodec / huffyuv.c
1 /*
2  * huffyuv codec for libavcodec
3  *
4  * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  *
22  * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
23  * the algorithm used
24  */
25
26 /**
27  * @file huffyuv.c
28  * huffyuv codec for libavcodec.
29  */
30
31 #include "avcodec.h"
32 #include "bitstream.h"
33 #include "dsputil.h"
34
35 #define VLC_BITS 11
36
37 #ifdef WORDS_BIGENDIAN
38 #define B 3
39 #define G 2
40 #define R 1
41 #else
42 #define B 0
43 #define G 1
44 #define R 2
45 #endif
46
47 typedef enum Predictor{
48     LEFT= 0,
49     PLANE,
50     MEDIAN,
51 } Predictor;
52
53 typedef struct HYuvContext{
54     AVCodecContext *avctx;
55     Predictor predictor;
56     GetBitContext gb;
57     PutBitContext pb;
58     int interlaced;
59     int decorrelate;
60     int bitstream_bpp;
61     int version;
62     int yuy2;                               //use yuy2 instead of 422P
63     int bgr32;                              //use bgr32 instead of bgr24
64     int width, height;
65     int flags;
66     int context;
67     int picture_number;
68     int last_slice_end;
69     uint8_t *temp[3];
70     uint64_t stats[3][256];
71     uint8_t len[3][256];
72     uint32_t bits[3][256];
73     VLC vlc[3];
74     AVFrame picture;
75     uint8_t *bitstream_buffer;
76     unsigned int bitstream_buffer_size;
77     DSPContext dsp;
78 }HYuvContext;
79
80 static const unsigned char classic_shift_luma[] = {
81   34,36,35,69,135,232,9,16,10,24,11,23,12,16,13,10,14,8,15,8,
82   16,8,17,20,16,10,207,206,205,236,11,8,10,21,9,23,8,8,199,70,
83   69,68, 0
84 };
85
86 static const unsigned char classic_shift_chroma[] = {
87   66,36,37,38,39,40,41,75,76,77,110,239,144,81,82,83,84,85,118,183,
88   56,57,88,89,56,89,154,57,58,57,26,141,57,56,58,57,58,57,184,119,
89   214,245,116,83,82,49,80,79,78,77,44,75,41,40,39,38,37,36,34, 0
90 };
91
92 static const unsigned char classic_add_luma[256] = {
93     3,  9,  5, 12, 10, 35, 32, 29, 27, 50, 48, 45, 44, 41, 39, 37,
94    73, 70, 68, 65, 64, 61, 58, 56, 53, 50, 49, 46, 44, 41, 38, 36,
95    68, 65, 63, 61, 58, 55, 53, 51, 48, 46, 45, 43, 41, 39, 38, 36,
96    35, 33, 32, 30, 29, 27, 26, 25, 48, 47, 46, 44, 43, 41, 40, 39,
97    37, 36, 35, 34, 32, 31, 30, 28, 27, 26, 24, 23, 22, 20, 19, 37,
98    35, 34, 33, 31, 30, 29, 27, 26, 24, 23, 21, 20, 18, 17, 15, 29,
99    27, 26, 24, 22, 21, 19, 17, 16, 14, 26, 25, 23, 21, 19, 18, 16,
100    15, 27, 25, 23, 21, 19, 17, 16, 14, 26, 25, 23, 21, 18, 17, 14,
101    12, 17, 19, 13,  4,  9,  2, 11,  1,  7,  8,  0, 16,  3, 14,  6,
102    12, 10,  5, 15, 18, 11, 10, 13, 15, 16, 19, 20, 22, 24, 27, 15,
103    18, 20, 22, 24, 26, 14, 17, 20, 22, 24, 27, 15, 18, 20, 23, 25,
104    28, 16, 19, 22, 25, 28, 32, 36, 21, 25, 29, 33, 38, 42, 45, 49,
105    28, 31, 34, 37, 40, 42, 44, 47, 49, 50, 52, 54, 56, 57, 59, 60,
106    62, 64, 66, 67, 69, 35, 37, 39, 40, 42, 43, 45, 47, 48, 51, 52,
107    54, 55, 57, 59, 60, 62, 63, 66, 67, 69, 71, 72, 38, 40, 42, 43,
108    46, 47, 49, 51, 26, 28, 30, 31, 33, 34, 18, 19, 11, 13,  7,  8,
109 };
110
111 static const unsigned char classic_add_chroma[256] = {
112     3,  1,  2,  2,  2,  2,  3,  3,  7,  5,  7,  5,  8,  6, 11,  9,
113     7, 13, 11, 10,  9,  8,  7,  5,  9,  7,  6,  4,  7,  5,  8,  7,
114    11,  8, 13, 11, 19, 15, 22, 23, 20, 33, 32, 28, 27, 29, 51, 77,
115    43, 45, 76, 81, 46, 82, 75, 55, 56,144, 58, 80, 60, 74,147, 63,
116   143, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
117    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 27, 30, 21, 22,
118    17, 14,  5,  6,100, 54, 47, 50, 51, 53,106,107,108,109,110,111,
119   112,113,114,115,  4,117,118, 92, 94,121,122,  3,124,103,  2,  1,
120     0,129,130,131,120,119,126,125,136,137,138,139,140,141,142,134,
121   135,132,133,104, 64,101, 62, 57,102, 95, 93, 59, 61, 28, 97, 96,
122    52, 49, 48, 29, 32, 25, 24, 46, 23, 98, 45, 44, 43, 20, 42, 41,
123    19, 18, 99, 40, 15, 39, 38, 16, 13, 12, 11, 37, 10,  9,  8, 36,
124     7,128,127,105,123,116, 35, 34, 33,145, 31, 79, 42,146, 78, 26,
125    83, 48, 49, 50, 44, 47, 26, 31, 30, 18, 17, 19, 21, 24, 25, 13,
126    14, 16, 17, 18, 20, 21, 12, 14, 15,  9, 10,  6,  9,  6,  5,  8,
127     6, 12,  8, 10,  7,  9,  6,  4,  6,  2,  2,  3,  3,  3,  3,  2,
128 };
129
130 static inline int add_left_prediction(uint8_t *dst, uint8_t *src, int w, int acc){
131     int i;
132
133     for(i=0; i<w-1; i++){
134         acc+= src[i];
135         dst[i]= acc;
136         i++;
137         acc+= src[i];
138         dst[i]= acc;
139     }
140
141     for(; i<w; i++){
142         acc+= src[i];
143         dst[i]= acc;
144     }
145
146     return acc;
147 }
148
149 static inline void add_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){
150     int i;
151     uint8_t l, lt;
152
153     l= *left;
154     lt= *left_top;
155
156     for(i=0; i<w; i++){
157         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
158         lt= src1[i];
159         dst[i]= l;
160     }
161
162     *left= l;
163     *left_top= lt;
164 }
165
166 static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){
167     int i;
168     int r,g,b;
169     r= *red;
170     g= *green;
171     b= *blue;
172
173     for(i=0; i<w; i++){
174         b+= src[4*i+B];
175         g+= src[4*i+G];
176         r+= src[4*i+R];
177
178         dst[4*i+B]= b;
179         dst[4*i+G]= g;
180         dst[4*i+R]= r;
181     }
182
183     *red= r;
184     *green= g;
185     *blue= b;
186 }
187
188 static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, uint8_t *src, int w, int left){
189     int i;
190     if(w<32){
191         for(i=0; i<w; i++){
192             const int temp= src[i];
193             dst[i]= temp - left;
194             left= temp;
195         }
196         return left;
197     }else{
198         for(i=0; i<16; i++){
199             const int temp= src[i];
200             dst[i]= temp - left;
201             left= temp;
202         }
203         s->dsp.diff_bytes(dst+16, src+16, src+15, w-16);
204         return src[w-1];
205     }
206 }
207
208 static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){
209     int i;
210     int r,g,b;
211     r= *red;
212     g= *green;
213     b= *blue;
214     for(i=0; i<FFMIN(w,4); i++){
215         const int rt= src[i*4+R];
216         const int gt= src[i*4+G];
217         const int bt= src[i*4+B];
218         dst[i*4+R]= rt - r;
219         dst[i*4+G]= gt - g;
220         dst[i*4+B]= bt - b;
221         r = rt;
222         g = gt;
223         b = bt;
224     }
225     s->dsp.diff_bytes(dst+16, src+16, src+12, w*4-16);
226     *red=   src[(w-1)*4+R];
227     *green= src[(w-1)*4+G];
228     *blue=  src[(w-1)*4+B];
229 }
230
231 static void read_len_table(uint8_t *dst, GetBitContext *gb){
232     int i, val, repeat;
233
234     for(i=0; i<256;){
235         repeat= get_bits(gb, 3);
236         val   = get_bits(gb, 5);
237         if(repeat==0)
238             repeat= get_bits(gb, 8);
239 //printf("%d %d\n", val, repeat);
240         while (repeat--)
241             dst[i++] = val;
242     }
243 }
244
245 static int generate_bits_table(uint32_t *dst, uint8_t *len_table){
246     int len, index;
247     uint32_t bits=0;
248
249     for(len=32; len>0; len--){
250         for(index=0; index<256; index++){
251             if(len_table[index]==len)
252                 dst[index]= bits++;
253         }
254         if(bits & 1){
255             av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n");
256             return -1;
257         }
258         bits >>= 1;
259     }
260     return 0;
261 }
262
263 #ifdef CONFIG_ENCODERS
264 typedef struct {
265     uint64_t val;
266     int name;
267 } heap_elem_t;
268
269 static void heap_sift(heap_elem_t *h, int root, int size)
270 {
271     while(root*2+1 < size) {
272         int child = root*2+1;
273         if(child < size-1 && h[child].val > h[child+1].val)
274             child++;
275         if(h[root].val > h[child].val) {
276             FFSWAP(heap_elem_t, h[root], h[child]);
277             root = child;
278         } else
279             break;
280     }
281 }
282
283 static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
284     heap_elem_t h[size];
285     int up[2*size];
286     int len[2*size];
287     int offset, i, next;
288
289     for(offset=1; ; offset<<=1){
290         for(i=0; i<size; i++){
291             h[i].name = i;
292             h[i].val = (stats[i] << 8) + offset;
293         }
294         for(i=size/2-1; i>=0; i--)
295             heap_sift(h, i, size);
296
297         for(next=size; next<size*2-1; next++){
298             // merge the two smallest entries, and put it back in the heap
299             uint64_t min1v = h[0].val;
300             up[h[0].name] = next;
301             h[0].val = INT64_MAX;
302             heap_sift(h, 0, size);
303             up[h[0].name] = next;
304             h[0].name = next;
305             h[0].val += min1v;
306             heap_sift(h, 0, size);
307         }
308
309         len[2*size-2] = 0;
310         for(i=2*size-3; i>=size; i--)
311             len[i] = len[up[i]] + 1;
312         for(i=0; i<size; i++) {
313             dst[i] = len[up[i]] + 1;
314             if(dst[i] > 32) break;
315         }
316         if(i==size) break;
317     }
318 }
319 #endif /* CONFIG_ENCODERS */
320
321 static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
322     GetBitContext gb;
323     int i;
324
325     init_get_bits(&gb, src, length*8);
326
327     for(i=0; i<3; i++){
328         read_len_table(s->len[i], &gb);
329
330         if(generate_bits_table(s->bits[i], s->len[i])<0){
331             return -1;
332         }
333 #if 0
334 for(j=0; j<256; j++){
335 printf("%6X, %2d,  %3d\n", s->bits[i][j], s->len[i][j], j);
336 }
337 #endif
338         free_vlc(&s->vlc[i]);
339         init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
340     }
341
342     return (get_bits_count(&gb)+7)/8;
343 }
344
345 static int read_old_huffman_tables(HYuvContext *s){
346 #if 1
347     GetBitContext gb;
348     int i;
349
350     init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8);
351     read_len_table(s->len[0], &gb);
352     init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8);
353     read_len_table(s->len[1], &gb);
354
355     for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma  [i];
356     for(i=0; i<256; i++) s->bits[1][i] = classic_add_chroma[i];
357
358     if(s->bitstream_bpp >= 24){
359         memcpy(s->bits[1], s->bits[0], 256*sizeof(uint32_t));
360         memcpy(s->len[1] , s->len [0], 256*sizeof(uint8_t));
361     }
362     memcpy(s->bits[2], s->bits[1], 256*sizeof(uint32_t));
363     memcpy(s->len[2] , s->len [1], 256*sizeof(uint8_t));
364
365     for(i=0; i<3; i++){
366         free_vlc(&s->vlc[i]);
367         init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
368     }
369
370     return 0;
371 #else
372     av_log(s->avctx, AV_LOG_DEBUG, "v1 huffyuv is not supported \n");
373     return -1;
374 #endif
375 }
376
377 static void alloc_temp(HYuvContext *s){
378     int i;
379
380     if(s->bitstream_bpp<24){
381         for(i=0; i<3; i++){
382             s->temp[i]= av_malloc(s->width + 16);
383         }
384     }else{
385         for(i=0; i<2; i++){
386             s->temp[i]= av_malloc(4*s->width + 16);
387         }
388     }
389 }
390
391 static int common_init(AVCodecContext *avctx){
392     HYuvContext *s = avctx->priv_data;
393
394     s->avctx= avctx;
395     s->flags= avctx->flags;
396
397     dsputil_init(&s->dsp, avctx);
398
399     s->width= avctx->width;
400     s->height= avctx->height;
401     assert(s->width>0 && s->height>0);
402
403     return 0;
404 }
405
406 #ifdef CONFIG_DECODERS
407 static int decode_init(AVCodecContext *avctx)
408 {
409     HYuvContext *s = avctx->priv_data;
410
411     common_init(avctx);
412     memset(s->vlc, 0, 3*sizeof(VLC));
413
414     avctx->coded_frame= &s->picture;
415     s->interlaced= s->height > 288;
416
417 s->bgr32=1;
418 //if(avctx->extradata)
419 //  printf("extradata:%X, extradata_size:%d\n", *(uint32_t*)avctx->extradata, avctx->extradata_size);
420     if(avctx->extradata_size){
421         if((avctx->bits_per_sample&7) && avctx->bits_per_sample != 12)
422             s->version=1; // do such files exist at all?
423         else
424             s->version=2;
425     }else
426         s->version=0;
427
428     if(s->version==2){
429         int method, interlace;
430
431         method= ((uint8_t*)avctx->extradata)[0];
432         s->decorrelate= method&64 ? 1 : 0;
433         s->predictor= method&63;
434         s->bitstream_bpp= ((uint8_t*)avctx->extradata)[1];
435         if(s->bitstream_bpp==0)
436             s->bitstream_bpp= avctx->bits_per_sample&~7;
437         interlace= (((uint8_t*)avctx->extradata)[2] & 0x30) >> 4;
438         s->interlaced= (interlace==1) ? 1 : (interlace==2) ? 0 : s->interlaced;
439         s->context= ((uint8_t*)avctx->extradata)[2] & 0x40 ? 1 : 0;
440
441         if(read_huffman_tables(s, ((uint8_t*)avctx->extradata)+4, avctx->extradata_size) < 0)
442             return -1;
443     }else{
444         switch(avctx->bits_per_sample&7){
445         case 1:
446             s->predictor= LEFT;
447             s->decorrelate= 0;
448             break;
449         case 2:
450             s->predictor= LEFT;
451             s->decorrelate= 1;
452             break;
453         case 3:
454             s->predictor= PLANE;
455             s->decorrelate= avctx->bits_per_sample >= 24;
456             break;
457         case 4:
458             s->predictor= MEDIAN;
459             s->decorrelate= 0;
460             break;
461         default:
462             s->predictor= LEFT; //OLD
463             s->decorrelate= 0;
464             break;
465         }
466         s->bitstream_bpp= avctx->bits_per_sample & ~7;
467         s->context= 0;
468
469         if(read_old_huffman_tables(s) < 0)
470             return -1;
471     }
472
473     switch(s->bitstream_bpp){
474     case 12:
475         avctx->pix_fmt = PIX_FMT_YUV420P;
476         break;
477     case 16:
478         if(s->yuy2){
479             avctx->pix_fmt = PIX_FMT_YUYV422;
480         }else{
481             avctx->pix_fmt = PIX_FMT_YUV422P;
482         }
483         break;
484     case 24:
485     case 32:
486         if(s->bgr32){
487             avctx->pix_fmt = PIX_FMT_RGB32;
488         }else{
489             avctx->pix_fmt = PIX_FMT_BGR24;
490         }
491         break;
492     default:
493         assert(0);
494     }
495
496     alloc_temp(s);
497
498 //    av_log(NULL, AV_LOG_DEBUG, "pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
499
500     return 0;
501 }
502 #endif
503
504 #ifdef CONFIG_ENCODERS
505 static int store_table(HYuvContext *s, uint8_t *len, uint8_t *buf){
506     int i;
507     int index= 0;
508
509     for(i=0; i<256;){
510         int val= len[i];
511         int repeat=0;
512
513         for(; i<256 && len[i]==val && repeat<255; i++)
514             repeat++;
515
516         assert(val < 32 && val >0 && repeat<256 && repeat>0);
517         if(repeat>7){
518             buf[index++]= val;
519             buf[index++]= repeat;
520         }else{
521             buf[index++]= val | (repeat<<5);
522         }
523     }
524
525     return index;
526 }
527
528 static int encode_init(AVCodecContext *avctx)
529 {
530     HYuvContext *s = avctx->priv_data;
531     int i, j;
532
533     common_init(avctx);
534
535     avctx->extradata= av_mallocz(1024*30); // 256*3+4 == 772
536     avctx->stats_out= av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
537     s->version=2;
538
539     avctx->coded_frame= &s->picture;
540
541     switch(avctx->pix_fmt){
542     case PIX_FMT_YUV420P:
543         s->bitstream_bpp= 12;
544         break;
545     case PIX_FMT_YUV422P:
546         s->bitstream_bpp= 16;
547         break;
548     case PIX_FMT_RGB32:
549         s->bitstream_bpp= 24;
550         break;
551     default:
552         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
553         return -1;
554     }
555     avctx->bits_per_sample= s->bitstream_bpp;
556     s->decorrelate= s->bitstream_bpp >= 24;
557     s->predictor= avctx->prediction_method;
558     s->interlaced= avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0;
559     if(avctx->context_model==1){
560         s->context= avctx->context_model;
561         if(s->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)){
562             av_log(avctx, AV_LOG_ERROR, "context=1 is not compatible with 2 pass huffyuv encoding\n");
563             return -1;
564         }
565     }else s->context= 0;
566
567     if(avctx->codec->id==CODEC_ID_HUFFYUV){
568         if(avctx->pix_fmt==PIX_FMT_YUV420P){
569             av_log(avctx, AV_LOG_ERROR, "Error: YV12 is not supported by huffyuv; use vcodec=ffvhuff or format=422p\n");
570             return -1;
571         }
572         if(avctx->context_model){
573             av_log(avctx, AV_LOG_ERROR, "Error: per-frame huffman tables are not supported by huffyuv; use vcodec=ffvhuff\n");
574             return -1;
575         }
576         if(s->interlaced != ( s->height > 288 ))
577             av_log(avctx, AV_LOG_INFO, "using huffyuv 2.2.0 or newer interlacing flag\n");
578     }
579
580     if(s->bitstream_bpp>=24 && s->predictor==MEDIAN){
581         av_log(avctx, AV_LOG_ERROR, "Error: RGB is incompatible with median predictor\n");
582         return -1;
583     }
584
585     ((uint8_t*)avctx->extradata)[0]= s->predictor | (s->decorrelate << 6);
586     ((uint8_t*)avctx->extradata)[1]= s->bitstream_bpp;
587     ((uint8_t*)avctx->extradata)[2]= s->interlaced ? 0x10 : 0x20;
588     if(s->context)
589         ((uint8_t*)avctx->extradata)[2]|= 0x40;
590     ((uint8_t*)avctx->extradata)[3]= 0;
591     s->avctx->extradata_size= 4;
592
593     if(avctx->stats_in){
594         char *p= avctx->stats_in;
595
596         for(i=0; i<3; i++)
597             for(j=0; j<256; j++)
598                 s->stats[i][j]= 1;
599
600         for(;;){
601             for(i=0; i<3; i++){
602                 char *next;
603
604                 for(j=0; j<256; j++){
605                     s->stats[i][j]+= strtol(p, &next, 0);
606                     if(next==p) return -1;
607                     p=next;
608                 }
609             }
610             if(p[0]==0 || p[1]==0 || p[2]==0) break;
611         }
612     }else{
613         for(i=0; i<3; i++)
614             for(j=0; j<256; j++){
615                 int d= FFMIN(j, 256-j);
616
617                 s->stats[i][j]= 100000000/(d+1);
618             }
619     }
620
621     for(i=0; i<3; i++){
622         generate_len_table(s->len[i], s->stats[i], 256);
623
624         if(generate_bits_table(s->bits[i], s->len[i])<0){
625             return -1;
626         }
627
628         s->avctx->extradata_size+=
629         store_table(s, s->len[i], &((uint8_t*)s->avctx->extradata)[s->avctx->extradata_size]);
630     }
631
632     if(s->context){
633         for(i=0; i<3; i++){
634             int pels = s->width*s->height / (i?40:10);
635             for(j=0; j<256; j++){
636                 int d= FFMIN(j, 256-j);
637                 s->stats[i][j]= pels/(d+1);
638             }
639         }
640     }else{
641         for(i=0; i<3; i++)
642             for(j=0; j<256; j++)
643                 s->stats[i][j]= 0;
644     }
645
646 //    printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
647
648     alloc_temp(s);
649
650     s->picture_number=0;
651
652     return 0;
653 }
654 #endif /* CONFIG_ENCODERS */
655
656 static void decode_422_bitstream(HYuvContext *s, int count){
657     int i;
658
659     count/=2;
660
661     for(i=0; i<count; i++){
662         s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
663         s->temp[1][  i  ]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
664         s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
665         s->temp[2][  i  ]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
666     }
667 }
668
669 static void decode_gray_bitstream(HYuvContext *s, int count){
670     int i;
671
672     count/=2;
673
674     for(i=0; i<count; i++){
675         s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
676         s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
677     }
678 }
679
680 #ifdef CONFIG_ENCODERS
681 static int encode_422_bitstream(HYuvContext *s, int count){
682     int i;
683
684     if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 2*4*count){
685         av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
686         return -1;
687     }
688
689 #define LOAD4\
690             int y0 = s->temp[0][2*i];\
691             int y1 = s->temp[0][2*i+1];\
692             int u0 = s->temp[1][i];\
693             int v0 = s->temp[2][i];
694
695     count/=2;
696     if(s->flags&CODEC_FLAG_PASS1){
697         for(i=0; i<count; i++){
698             LOAD4;
699             s->stats[0][y0]++;
700             s->stats[1][u0]++;
701             s->stats[0][y1]++;
702             s->stats[2][v0]++;
703         }
704     }
705     if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)
706         return 0;
707     if(s->context){
708         for(i=0; i<count; i++){
709             LOAD4;
710             s->stats[0][y0]++;
711             put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);
712             s->stats[1][u0]++;
713             put_bits(&s->pb, s->len[1][u0], s->bits[1][u0]);
714             s->stats[0][y1]++;
715             put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
716             s->stats[2][v0]++;
717             put_bits(&s->pb, s->len[2][v0], s->bits[2][v0]);
718         }
719     }else{
720         for(i=0; i<count; i++){
721             LOAD4;
722             put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);
723             put_bits(&s->pb, s->len[1][u0], s->bits[1][u0]);
724             put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
725             put_bits(&s->pb, s->len[2][v0], s->bits[2][v0]);
726         }
727     }
728     return 0;
729 }
730
731 static int encode_gray_bitstream(HYuvContext *s, int count){
732     int i;
733
734     if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 4*count){
735         av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
736         return -1;
737     }
738
739 #define LOAD2\
740             int y0 = s->temp[0][2*i];\
741             int y1 = s->temp[0][2*i+1];
742 #define STAT2\
743             s->stats[0][y0]++;\
744             s->stats[0][y1]++;
745 #define WRITE2\
746             put_bits(&s->pb, s->len[0][y0], s->bits[0][y0]);\
747             put_bits(&s->pb, s->len[0][y1], s->bits[0][y1]);
748
749     count/=2;
750     if(s->flags&CODEC_FLAG_PASS1){
751         for(i=0; i<count; i++){
752             LOAD2;
753             STAT2;
754         }
755     }
756     if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)
757         return 0;
758
759     if(s->context){
760         for(i=0; i<count; i++){
761             LOAD2;
762             STAT2;
763             WRITE2;
764         }
765     }else{
766         for(i=0; i<count; i++){
767             LOAD2;
768             WRITE2;
769         }
770     }
771     return 0;
772 }
773 #endif /* CONFIG_ENCODERS */
774
775 static void decode_bgr_bitstream(HYuvContext *s, int count){
776     int i;
777
778     if(s->decorrelate){
779         if(s->bitstream_bpp==24){
780             for(i=0; i<count; i++){
781                 s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
782                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G];
783                 s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G];
784             }
785         }else{
786             for(i=0; i<count; i++){
787                 s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
788                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G];
789                 s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G];
790                                    get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
791             }
792         }
793     }else{
794         if(s->bitstream_bpp==24){
795             for(i=0; i<count; i++){
796                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
797                 s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
798                 s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
799             }
800         }else{
801             for(i=0; i<count; i++){
802                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
803                 s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
804                 s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
805                                    get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
806             }
807         }
808     }
809 }
810
811 static int encode_bgr_bitstream(HYuvContext *s, int count){
812     int i;
813
814     if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3*4*count){
815         av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
816         return -1;
817     }
818
819 #define LOAD3\
820             int g= s->temp[0][4*i+G];\
821             int b= (s->temp[0][4*i+B] - g) & 0xff;\
822             int r= (s->temp[0][4*i+R] - g) & 0xff;
823 #define STAT3\
824             s->stats[0][b]++;\
825             s->stats[1][g]++;\
826             s->stats[2][r]++;
827 #define WRITE3\
828             put_bits(&s->pb, s->len[1][g], s->bits[1][g]);\
829             put_bits(&s->pb, s->len[0][b], s->bits[0][b]);\
830             put_bits(&s->pb, s->len[2][r], s->bits[2][r]);
831
832     if((s->flags&CODEC_FLAG_PASS1) && (s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)){
833         for(i=0; i<count; i++){
834             LOAD3;
835             STAT3;
836         }
837     }else if(s->context || (s->flags&CODEC_FLAG_PASS1)){
838         for(i=0; i<count; i++){
839             LOAD3;
840             STAT3;
841             WRITE3;
842         }
843     }else{
844         for(i=0; i<count; i++){
845             LOAD3;
846             WRITE3;
847         }
848     }
849     return 0;
850 }
851
852 #ifdef CONFIG_DECODERS
853 static void draw_slice(HYuvContext *s, int y){
854     int h, cy;
855     int offset[4];
856
857     if(s->avctx->draw_horiz_band==NULL)
858         return;
859
860     h= y - s->last_slice_end;
861     y -= h;
862
863     if(s->bitstream_bpp==12){
864         cy= y>>1;
865     }else{
866         cy= y;
867     }
868
869     offset[0] = s->picture.linesize[0]*y;
870     offset[1] = s->picture.linesize[1]*cy;
871     offset[2] = s->picture.linesize[2]*cy;
872     offset[3] = 0;
873     emms_c();
874
875     s->avctx->draw_horiz_band(s->avctx, &s->picture, offset, y, 3, h);
876
877     s->last_slice_end= y + h;
878 }
879
880 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
881     HYuvContext *s = avctx->priv_data;
882     const int width= s->width;
883     const int width2= s->width>>1;
884     const int height= s->height;
885     int fake_ystride, fake_ustride, fake_vstride;
886     AVFrame * const p= &s->picture;
887     int table_size= 0;
888
889     AVFrame *picture = data;
890
891     s->bitstream_buffer= av_fast_realloc(s->bitstream_buffer, &s->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
892
893     s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4);
894
895     if(p->data[0])
896         avctx->release_buffer(avctx, p);
897
898     p->reference= 0;
899     if(avctx->get_buffer(avctx, p) < 0){
900         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
901         return -1;
902     }
903
904     if(s->context){
905         table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size);
906         if(table_size < 0)
907             return -1;
908     }
909
910     if((unsigned)(buf_size-table_size) >= INT_MAX/8)
911         return -1;
912
913     init_get_bits(&s->gb, s->bitstream_buffer+table_size, (buf_size-table_size)*8);
914
915     fake_ystride= s->interlaced ? p->linesize[0]*2  : p->linesize[0];
916     fake_ustride= s->interlaced ? p->linesize[1]*2  : p->linesize[1];
917     fake_vstride= s->interlaced ? p->linesize[2]*2  : p->linesize[2];
918
919     s->last_slice_end= 0;
920
921     if(s->bitstream_bpp<24){
922         int y, cy;
923         int lefty, leftu, leftv;
924         int lefttopy, lefttopu, lefttopv;
925
926         if(s->yuy2){
927             p->data[0][3]= get_bits(&s->gb, 8);
928             p->data[0][2]= get_bits(&s->gb, 8);
929             p->data[0][1]= get_bits(&s->gb, 8);
930             p->data[0][0]= get_bits(&s->gb, 8);
931
932             av_log(avctx, AV_LOG_ERROR, "YUY2 output is not implemented yet\n");
933             return -1;
934         }else{
935
936             leftv= p->data[2][0]= get_bits(&s->gb, 8);
937             lefty= p->data[0][1]= get_bits(&s->gb, 8);
938             leftu= p->data[1][0]= get_bits(&s->gb, 8);
939                    p->data[0][0]= get_bits(&s->gb, 8);
940
941             switch(s->predictor){
942             case LEFT:
943             case PLANE:
944                 decode_422_bitstream(s, width-2);
945                 lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
946                 if(!(s->flags&CODEC_FLAG_GRAY)){
947                     leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
948                     leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
949                 }
950
951                 for(cy=y=1; y<s->height; y++,cy++){
952                     uint8_t *ydst, *udst, *vdst;
953
954                     if(s->bitstream_bpp==12){
955                         decode_gray_bitstream(s, width);
956
957                         ydst= p->data[0] + p->linesize[0]*y;
958
959                         lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
960                         if(s->predictor == PLANE){
961                             if(y>s->interlaced)
962                                 s->dsp.add_bytes(ydst, ydst - fake_ystride, width);
963                         }
964                         y++;
965                         if(y>=s->height) break;
966                     }
967
968                     draw_slice(s, y);
969
970                     ydst= p->data[0] + p->linesize[0]*y;
971                     udst= p->data[1] + p->linesize[1]*cy;
972                     vdst= p->data[2] + p->linesize[2]*cy;
973
974                     decode_422_bitstream(s, width);
975                     lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
976                     if(!(s->flags&CODEC_FLAG_GRAY)){
977                         leftu= add_left_prediction(udst, s->temp[1], width2, leftu);
978                         leftv= add_left_prediction(vdst, s->temp[2], width2, leftv);
979                     }
980                     if(s->predictor == PLANE){
981                         if(cy>s->interlaced){
982                             s->dsp.add_bytes(ydst, ydst - fake_ystride, width);
983                             if(!(s->flags&CODEC_FLAG_GRAY)){
984                                 s->dsp.add_bytes(udst, udst - fake_ustride, width2);
985                                 s->dsp.add_bytes(vdst, vdst - fake_vstride, width2);
986                             }
987                         }
988                     }
989                 }
990                 draw_slice(s, height);
991
992                 break;
993             case MEDIAN:
994                 /* first line except first 2 pixels is left predicted */
995                 decode_422_bitstream(s, width-2);
996                 lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
997                 if(!(s->flags&CODEC_FLAG_GRAY)){
998                     leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
999                     leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
1000                 }
1001
1002                 cy=y=1;
1003
1004                 /* second line is left predicted for interlaced case */
1005                 if(s->interlaced){
1006                     decode_422_bitstream(s, width);
1007                     lefty= add_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty);
1008                     if(!(s->flags&CODEC_FLAG_GRAY)){
1009                         leftu= add_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu);
1010                         leftv= add_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv);
1011                     }
1012                     y++; cy++;
1013                 }
1014
1015                 /* next 4 pixels are left predicted too */
1016                 decode_422_bitstream(s, 4);
1017                 lefty= add_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty);
1018                 if(!(s->flags&CODEC_FLAG_GRAY)){
1019                     leftu= add_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu);
1020                     leftv= add_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv);
1021                 }
1022
1023                 /* next line except the first 4 pixels is median predicted */
1024                 lefttopy= p->data[0][3];
1025                 decode_422_bitstream(s, width-4);
1026                 add_median_prediction(p->data[0] + fake_ystride+4, p->data[0]+4, s->temp[0], width-4, &lefty, &lefttopy);
1027                 if(!(s->flags&CODEC_FLAG_GRAY)){
1028                     lefttopu= p->data[1][1];
1029                     lefttopv= p->data[2][1];
1030                     add_median_prediction(p->data[1] + fake_ustride+2, p->data[1]+2, s->temp[1], width2-2, &leftu, &lefttopu);
1031                     add_median_prediction(p->data[2] + fake_vstride+2, p->data[2]+2, s->temp[2], width2-2, &leftv, &lefttopv);
1032                 }
1033                 y++; cy++;
1034
1035                 for(; y<height; y++,cy++){
1036                     uint8_t *ydst, *udst, *vdst;
1037
1038                     if(s->bitstream_bpp==12){
1039                         while(2*cy > y){
1040                             decode_gray_bitstream(s, width);
1041                             ydst= p->data[0] + p->linesize[0]*y;
1042                             add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy);
1043                             y++;
1044                         }
1045                         if(y>=height) break;
1046                     }
1047                     draw_slice(s, y);
1048
1049                     decode_422_bitstream(s, width);
1050
1051                     ydst= p->data[0] + p->linesize[0]*y;
1052                     udst= p->data[1] + p->linesize[1]*cy;
1053                     vdst= p->data[2] + p->linesize[2]*cy;
1054
1055                     add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy);
1056                     if(!(s->flags&CODEC_FLAG_GRAY)){
1057                         add_median_prediction(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu);
1058                         add_median_prediction(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv);
1059                     }
1060                 }
1061
1062                 draw_slice(s, height);
1063                 break;
1064             }
1065         }
1066     }else{
1067         int y;
1068         int leftr, leftg, leftb;
1069         const int last_line= (height-1)*p->linesize[0];
1070
1071         if(s->bitstream_bpp==32){
1072             skip_bits(&s->gb, 8);
1073             leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8);
1074             leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8);
1075             leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8);
1076         }else{
1077             leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8);
1078             leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8);
1079             leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8);
1080             skip_bits(&s->gb, 8);
1081         }
1082
1083         if(s->bgr32){
1084             switch(s->predictor){
1085             case LEFT:
1086             case PLANE:
1087                 decode_bgr_bitstream(s, width-1);
1088                 add_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width-1, &leftr, &leftg, &leftb);
1089
1090                 for(y=s->height-2; y>=0; y--){ //yes its stored upside down
1091                     decode_bgr_bitstream(s, width);
1092
1093                     add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb);
1094                     if(s->predictor == PLANE){
1095                         if((y&s->interlaced)==0 && y<s->height-1-s->interlaced){
1096                             s->dsp.add_bytes(p->data[0] + p->linesize[0]*y,
1097                                              p->data[0] + p->linesize[0]*y + fake_ystride, fake_ystride);
1098                         }
1099                     }
1100                 }
1101                 draw_slice(s, height); // just 1 large slice as this is not possible in reverse order
1102                 break;
1103             default:
1104                 av_log(avctx, AV_LOG_ERROR, "prediction type not supported!\n");
1105             }
1106         }else{
1107
1108             av_log(avctx, AV_LOG_ERROR, "BGR24 output is not implemented yet\n");
1109             return -1;
1110         }
1111     }
1112     emms_c();
1113
1114     *picture= *p;
1115     *data_size = sizeof(AVFrame);
1116
1117     return (get_bits_count(&s->gb)+31)/32*4 + table_size;
1118 }
1119 #endif
1120
1121 static int common_end(HYuvContext *s){
1122     int i;
1123
1124     for(i=0; i<3; i++){
1125         av_freep(&s->temp[i]);
1126     }
1127     return 0;
1128 }
1129
1130 #ifdef CONFIG_DECODERS
1131 static int decode_end(AVCodecContext *avctx)
1132 {
1133     HYuvContext *s = avctx->priv_data;
1134     int i;
1135
1136     common_end(s);
1137     av_freep(&s->bitstream_buffer);
1138
1139     for(i=0; i<3; i++){
1140         free_vlc(&s->vlc[i]);
1141     }
1142
1143     return 0;
1144 }
1145 #endif
1146
1147 #ifdef CONFIG_ENCODERS
1148 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
1149     HYuvContext *s = avctx->priv_data;
1150     AVFrame *pict = data;
1151     const int width= s->width;
1152     const int width2= s->width>>1;
1153     const int height= s->height;
1154     const int fake_ystride= s->interlaced ? pict->linesize[0]*2  : pict->linesize[0];
1155     const int fake_ustride= s->interlaced ? pict->linesize[1]*2  : pict->linesize[1];
1156     const int fake_vstride= s->interlaced ? pict->linesize[2]*2  : pict->linesize[2];
1157     AVFrame * const p= &s->picture;
1158     int i, j, size=0;
1159
1160     *p = *pict;
1161     p->pict_type= FF_I_TYPE;
1162     p->key_frame= 1;
1163
1164     if(s->context){
1165         for(i=0; i<3; i++){
1166             generate_len_table(s->len[i], s->stats[i], 256);
1167             if(generate_bits_table(s->bits[i], s->len[i])<0)
1168                 return -1;
1169             size+= store_table(s, s->len[i], &buf[size]);
1170         }
1171
1172         for(i=0; i<3; i++)
1173             for(j=0; j<256; j++)
1174                 s->stats[i][j] >>= 1;
1175     }
1176
1177     init_put_bits(&s->pb, buf+size, buf_size-size);
1178
1179     if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){
1180         int lefty, leftu, leftv, y, cy;
1181
1182         put_bits(&s->pb, 8, leftv= p->data[2][0]);
1183         put_bits(&s->pb, 8, lefty= p->data[0][1]);
1184         put_bits(&s->pb, 8, leftu= p->data[1][0]);
1185         put_bits(&s->pb, 8,        p->data[0][0]);
1186
1187         lefty= sub_left_prediction(s, s->temp[0], p->data[0]+2, width-2 , lefty);
1188         leftu= sub_left_prediction(s, s->temp[1], p->data[1]+1, width2-1, leftu);
1189         leftv= sub_left_prediction(s, s->temp[2], p->data[2]+1, width2-1, leftv);
1190
1191         encode_422_bitstream(s, width-2);
1192
1193         if(s->predictor==MEDIAN){
1194             int lefttopy, lefttopu, lefttopv;
1195             cy=y=1;
1196             if(s->interlaced){
1197                 lefty= sub_left_prediction(s, s->temp[0], p->data[0]+p->linesize[0], width , lefty);
1198                 leftu= sub_left_prediction(s, s->temp[1], p->data[1]+p->linesize[1], width2, leftu);
1199                 leftv= sub_left_prediction(s, s->temp[2], p->data[2]+p->linesize[2], width2, leftv);
1200
1201                 encode_422_bitstream(s, width);
1202                 y++; cy++;
1203             }
1204
1205             lefty= sub_left_prediction(s, s->temp[0], p->data[0]+fake_ystride, 4, lefty);
1206             leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ustride, 2, leftu);
1207             leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_vstride, 2, leftv);
1208
1209             encode_422_bitstream(s, 4);
1210
1211             lefttopy= p->data[0][3];
1212             lefttopu= p->data[1][1];
1213             lefttopv= p->data[2][1];
1214             s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy);
1215             s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu);
1216             s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv);
1217             encode_422_bitstream(s, width-4);
1218             y++; cy++;
1219
1220             for(; y<height; y++,cy++){
1221                 uint8_t *ydst, *udst, *vdst;
1222
1223                 if(s->bitstream_bpp==12){
1224                     while(2*cy > y){
1225                         ydst= p->data[0] + p->linesize[0]*y;
1226                         s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
1227                         encode_gray_bitstream(s, width);
1228                         y++;
1229                     }
1230                     if(y>=height) break;
1231                 }
1232                 ydst= p->data[0] + p->linesize[0]*y;
1233                 udst= p->data[1] + p->linesize[1]*cy;
1234                 vdst= p->data[2] + p->linesize[2]*cy;
1235
1236                 s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
1237                 s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu);
1238                 s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv);
1239
1240                 encode_422_bitstream(s, width);
1241             }
1242         }else{
1243             for(cy=y=1; y<height; y++,cy++){
1244                 uint8_t *ydst, *udst, *vdst;
1245
1246                 /* encode a luma only line & y++ */
1247                 if(s->bitstream_bpp==12){
1248                     ydst= p->data[0] + p->linesize[0]*y;
1249
1250                     if(s->predictor == PLANE && s->interlaced < y){
1251                         s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
1252
1253                         lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
1254                     }else{
1255                         lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty);
1256                     }
1257                     encode_gray_bitstream(s, width);
1258                     y++;
1259                     if(y>=height) break;
1260                 }
1261
1262                 ydst= p->data[0] + p->linesize[0]*y;
1263                 udst= p->data[1] + p->linesize[1]*cy;
1264                 vdst= p->data[2] + p->linesize[2]*cy;
1265
1266                 if(s->predictor == PLANE && s->interlaced < cy){
1267                     s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width);
1268                     s->dsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2);
1269                     s->dsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2);
1270
1271                     lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty);
1272                     leftu= sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu);
1273                     leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + width2, width2, leftv);
1274                 }else{
1275                     lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty);
1276                     leftu= sub_left_prediction(s, s->temp[1], udst, width2, leftu);
1277                     leftv= sub_left_prediction(s, s->temp[2], vdst, width2, leftv);
1278                 }
1279
1280                 encode_422_bitstream(s, width);
1281             }
1282         }
1283     }else if(avctx->pix_fmt == PIX_FMT_RGB32){
1284         uint8_t *data = p->data[0] + (height-1)*p->linesize[0];
1285         const int stride = -p->linesize[0];
1286         const int fake_stride = -fake_ystride;
1287         int y;
1288         int leftr, leftg, leftb;
1289
1290         put_bits(&s->pb, 8, leftr= data[R]);
1291         put_bits(&s->pb, 8, leftg= data[G]);
1292         put_bits(&s->pb, 8, leftb= data[B]);
1293         put_bits(&s->pb, 8, 0);
1294
1295         sub_left_prediction_bgr32(s, s->temp[0], data+4, width-1, &leftr, &leftg, &leftb);
1296         encode_bgr_bitstream(s, width-1);
1297
1298         for(y=1; y<s->height; y++){
1299             uint8_t *dst = data + y*stride;
1300             if(s->predictor == PLANE && s->interlaced < y){
1301                 s->dsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width*4);
1302                 sub_left_prediction_bgr32(s, s->temp[0], s->temp[1], width, &leftr, &leftg, &leftb);
1303             }else{
1304                 sub_left_prediction_bgr32(s, s->temp[0], dst, width, &leftr, &leftg, &leftb);
1305             }
1306             encode_bgr_bitstream(s, width);
1307         }
1308     }else{
1309         av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
1310     }
1311     emms_c();
1312
1313     size+= (put_bits_count(&s->pb)+31)/8;
1314     size/= 4;
1315
1316     if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){
1317         int j;
1318         char *p= avctx->stats_out;
1319         char *end= p + 1024*30;
1320         for(i=0; i<3; i++){
1321             for(j=0; j<256; j++){
1322                 snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]);
1323                 p+= strlen(p);
1324                 s->stats[i][j]= 0;
1325             }
1326             snprintf(p, end-p, "\n");
1327             p++;
1328         }
1329     } else
1330         avctx->stats_out[0] = '\0';
1331     if(!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)){
1332         flush_put_bits(&s->pb);
1333         s->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
1334     }
1335
1336     s->picture_number++;
1337
1338     return size*4;
1339 }
1340
1341 static int encode_end(AVCodecContext *avctx)
1342 {
1343     HYuvContext *s = avctx->priv_data;
1344
1345     common_end(s);
1346
1347     av_freep(&avctx->extradata);
1348     av_freep(&avctx->stats_out);
1349
1350     return 0;
1351 }
1352 #endif /* CONFIG_ENCODERS */
1353
1354 #ifdef CONFIG_DECODERS
1355 AVCodec huffyuv_decoder = {
1356     "huffyuv",
1357     CODEC_TYPE_VIDEO,
1358     CODEC_ID_HUFFYUV,
1359     sizeof(HYuvContext),
1360     decode_init,
1361     NULL,
1362     decode_end,
1363     decode_frame,
1364     CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
1365     NULL
1366 };
1367
1368 AVCodec ffvhuff_decoder = {
1369     "ffvhuff",
1370     CODEC_TYPE_VIDEO,
1371     CODEC_ID_FFVHUFF,
1372     sizeof(HYuvContext),
1373     decode_init,
1374     NULL,
1375     decode_end,
1376     decode_frame,
1377     CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND,
1378     NULL
1379 };
1380 #endif
1381
1382 #ifdef CONFIG_ENCODERS
1383
1384 AVCodec huffyuv_encoder = {
1385     "huffyuv",
1386     CODEC_TYPE_VIDEO,
1387     CODEC_ID_HUFFYUV,
1388     sizeof(HYuvContext),
1389     encode_init,
1390     encode_frame,
1391     encode_end,
1392     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_RGB32, -1},
1393 };
1394
1395 AVCodec ffvhuff_encoder = {
1396     "ffvhuff",
1397     CODEC_TYPE_VIDEO,
1398     CODEC_ID_FFVHUFF,
1399     sizeof(HYuvContext),
1400     encode_init,
1401     encode_frame,
1402     encode_end,
1403     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV422P, PIX_FMT_RGB32, -1},
1404 };
1405
1406 #endif //CONFIG_ENCODERS