git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/intreadwrite.h"
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32
  33 //#undef NDEBUG
  34 //#include <assert.h>
  35
  36 #define BLOCK_TYPE_VLC_BITS 5
  37 #define ACDC_VLC_BITS 9
  38
  39 #define CFRAME_BUFFER_COUNT 100
  40
  41 static const uint8_t block_type_tab[2][4][8][2]={
  42  {
  43   {   //{8,4,2}x{8,4,2}
  44     { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0}
  45   },{ //{8,4}x1
  46     { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0}
  47   },{ //1x{8,4}
  48     { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0}
  49   },{ //1x2, 2x1
  50     { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}
  51   }
  52  },{
  53   {  //{8,4,2}x{8,4,2}
  54     { 1,2}, { 4,3}, { 5,3}, {0,2}, {6,3}, {7,3}, {0,0}
  55   },{//{8,4}x1
  56     { 1,2}, { 0,0}, { 2,2}, {0,2}, {6,3}, {7,3}, {0,0}
  57   },{//1x{8,4}
  58     { 1,2}, { 2,2}, { 0,0}, {0,2}, {6,3}, {7,3}, {0,0}
  59   },{//1x2, 2x1
  60     { 1,2}, { 0,0}, { 0,0}, {0,2}, {2,2}, {6,3}, {7,3}
  61   }
  62  }
  63 };
  64
  65 static const uint8_t size2index[4][4]={
  66   {-1, 3, 1, 1},
  67   { 3, 0, 0, 0},
  68   { 2, 0, 0, 0},
  69   { 2, 0, 0, 0},
  70 };
  71
  72 static const int8_t mv[256][2]={
  73 {  0,  0},{  0, -1},{ -1,  0},{  1,  0},{  0,  1},{ -1, -1},{  1, -1},{ -1,  1},
  74 {  1,  1},{  0, -2},{ -2,  0},{  2,  0},{  0,  2},{ -1, -2},{  1, -2},{ -2, -1},
  75 {  2, -1},{ -2,  1},{  2,  1},{ -1,  2},{  1,  2},{ -2, -2},{  2, -2},{ -2,  2},
  76 {  2,  2},{  0, -3},{ -3,  0},{  3,  0},{  0,  3},{ -1, -3},{  1, -3},{ -3, -1},
  77 {  3, -1},{ -3,  1},{  3,  1},{ -1,  3},{  1,  3},{ -2, -3},{  2, -3},{ -3, -2},
  78 {  3, -2},{ -3,  2},{  3,  2},{ -2,  3},{  2,  3},{  0, -4},{ -4,  0},{  4,  0},
  79 {  0,  4},{ -1, -4},{  1, -4},{ -4, -1},{  4, -1},{  4,  1},{ -1,  4},{  1,  4},
  80 { -3, -3},{ -3,  3},{  3,  3},{ -2, -4},{ -4, -2},{  4, -2},{ -4,  2},{ -2,  4},
  81 {  2,  4},{ -3, -4},{  3, -4},{  4, -3},{ -5,  0},{ -4,  3},{ -3,  4},{  3,  4},
  82 { -1, -5},{ -5, -1},{ -5,  1},{ -1,  5},{ -2, -5},{  2, -5},{  5, -2},{  5,  2},
  83 { -4, -4},{ -4,  4},{ -3, -5},{ -5, -3},{ -5,  3},{  3,  5},{ -6,  0},{  0,  6},
  84 { -6, -1},{ -6,  1},{  1,  6},{  2, -6},{ -6,  2},{  2,  6},{ -5, -4},{  5,  4},
  85 {  4,  5},{ -6, -3},{  6,  3},{ -7,  0},{ -1, -7},{  5, -5},{ -7,  1},{ -1,  7},
  86 {  4, -6},{  6,  4},{ -2, -7},{ -7,  2},{ -3, -7},{  7, -3},{  3,  7},{  6, -5},
  87 {  0, -8},{ -1, -8},{ -7, -4},{ -8,  1},{  4,  7},{  2, -8},{ -2,  8},{  6,  6},
  88 { -8,  3},{  5, -7},{ -5,  7},{  8, -4},{  0, -9},{ -9, -1},{  1,  9},{  7, -6},
  89 { -7,  6},{ -5, -8},{ -5,  8},{ -9,  3},{  9, -4},{  7, -7},{  8, -6},{  6,  8},
  90 { 10,  1},{-10,  2},{  9, -5},{ 10, -3},{ -8, -7},{-10, -4},{  6, -9},{-11,  0},
  91 { 11,  1},{-11, -2},{ -2, 11},{  7, -9},{ -7,  9},{ 10,  6},{ -4, 11},{  8, -9},
  92 {  8,  9},{  5, 11},{  7,-10},{ 12, -3},{ 11,  6},{ -9, -9},{  8, 10},{  5, 12},
  93 {-11,  7},{ 13,  2},{  6,-12},{ 10,  9},{-11,  8},{ -7, 12},{  0, 14},{ 14, -2},
  94 { -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{  5, 14},{-15, -1},{-14, -6},{  3,-15},
  95 { 11,-11},{ -7, 14},{ -5, 15},{  8,-14},{ 15,  6},{  3, 16},{  7,-15},{-16,  5},
  96 {  0, 17},{-16, -6},{-10, 14},{-16,  7},{ 12, 13},{-16,  8},{-17,  6},{-18,  3},
  97 { -7, 17},{ 15, 11},{ 16, 10},{  2,-19},{  3,-19},{-11,-16},{-18,  8},{-19, -6},
  98 {  2,-20},{-17,-11},{-10,-18},{  8, 19},{-21, -1},{-20,  7},{ -4, 21},{ 21,  5},
  99 { 15, 16},{  2,-22},{-10,-20},{-22,  5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5},
 100 { 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24},
 101 { 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27,  6},{  1,-28},
 102 {-11, 26},{-17,-23},{  7, 28},{ 11,-27},{ 29,  5},{-23,-19},{-28,-11},{-21, 22},
 103 {-30,  7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27},
 104 {-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32}
 105 };
 106
 107 // this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table
 108 static const uint8_t dequant_table[64]={
 109  16, 15, 13, 19, 24, 31, 28, 17,
 110  17, 23, 25, 31, 36, 63, 45, 21,
 111  18, 24, 27, 37, 52, 59, 49, 20,
 112  16, 28, 34, 40, 60, 80, 51, 20,
 113  18, 31, 48, 66, 68, 86, 56, 21,
 114  19, 38, 56, 59, 64, 64, 48, 20,
 115  27, 48, 55, 55, 56, 51, 35, 15,
 116  20, 35, 34, 32, 31, 22, 15,  8,
 117 };
 118
 119 static VLC block_type_vlc[2][4];
 120
 121
 122 typedef struct CFrameBuffer{
 123     unsigned int allocated_size;
 124     unsigned int size;
 125     int id;
 126     uint8_t *data;
 127 }CFrameBuffer;
 128
 129 typedef struct FourXContext{
 130     AVCodecContext *avctx;
 131     DSPContext dsp;
 132     AVFrame current_picture, last_picture;
 133     GetBitContext pre_gb;          ///< ac/dc prefix
 134     GetBitContext gb;
 135     const uint8_t *bytestream;
 136     const uint16_t *wordstream;
 137     int mv[256];
 138     VLC pre_vlc;
 139     int last_dc;
 140     DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
 141     void *bitstream_buffer;
 142     unsigned int bitstream_buffer_size;
 143     int version;
 144     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 145 } FourXContext;
 146
 147
 148 #define FIX_1_082392200  70936
 149 #define FIX_1_414213562  92682
 150 #define FIX_1_847759065 121095
 151 #define FIX_2_613125930 171254
 152
 153 #define MULTIPLY(var,const)  (((var)*(const)) >> 16)
 154
 155 static void idct(DCTELEM block[64]){
 156     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 157     int tmp10, tmp11, tmp12, tmp13;
 158     int z5, z10, z11, z12, z13;
 159     int i;
 160     int temp[64];
 161
 162     for(i=0; i<8; i++){
 163         tmp10 = block[8*0 + i] + block[8*4 + i];
 164         tmp11 = block[8*0 + i] - block[8*4 + i];
 165
 166         tmp13 =          block[8*2 + i] + block[8*6 + i];
 167         tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13;
 168
 169         tmp0 = tmp10 + tmp13;
 170         tmp3 = tmp10 - tmp13;
 171         tmp1 = tmp11 + tmp12;
 172         tmp2 = tmp11 - tmp12;
 173
 174         z13 = block[8*5 + i] + block[8*3 + i];
 175         z10 = block[8*5 + i] - block[8*3 + i];
 176         z11 = block[8*1 + i] + block[8*7 + i];
 177         z12 = block[8*1 + i] - block[8*7 + i];
 178
 179         tmp7  =          z11 + z13;
 180         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 181
 182         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 183         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 184         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 185
 186         tmp6 = tmp12 - tmp7;
 187         tmp5 = tmp11 - tmp6;
 188         tmp4 = tmp10 + tmp5;
 189
 190         temp[8*0 + i] = tmp0 + tmp7;
 191         temp[8*7 + i] = tmp0 - tmp7;
 192         temp[8*1 + i] = tmp1 + tmp6;
 193         temp[8*6 + i] = tmp1 - tmp6;
 194         temp[8*2 + i] = tmp2 + tmp5;
 195         temp[8*5 + i] = tmp2 - tmp5;
 196         temp[8*4 + i] = tmp3 + tmp4;
 197         temp[8*3 + i] = tmp3 - tmp4;
 198     }
 199
 200     for(i=0; i<8*8; i+=8){
 201         tmp10 = temp[0 + i] + temp[4 + i];
 202         tmp11 = temp[0 + i] - temp[4 + i];
 203
 204         tmp13 = temp[2 + i] + temp[6 + i];
 205         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 206
 207         tmp0 = tmp10 + tmp13;
 208         tmp3 = tmp10 - tmp13;
 209         tmp1 = tmp11 + tmp12;
 210         tmp2 = tmp11 - tmp12;
 211
 212         z13 = temp[5 + i] + temp[3 + i];
 213         z10 = temp[5 + i] - temp[3 + i];
 214         z11 = temp[1 + i] + temp[7 + i];
 215         z12 = temp[1 + i] - temp[7 + i];
 216
 217         tmp7 = z11 + z13;
 218         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 219
 220         z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
 221         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 222         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 223
 224         tmp6 = tmp12 - tmp7;
 225         tmp5 = tmp11 - tmp6;
 226         tmp4 = tmp10 + tmp5;
 227
 228         block[0 + i] = (tmp0 + tmp7)>>6;
 229         block[7 + i] = (tmp0 - tmp7)>>6;
 230         block[1 + i] = (tmp1 + tmp6)>>6;
 231         block[6 + i] = (tmp1 - tmp6)>>6;
 232         block[2 + i] = (tmp2 + tmp5)>>6;
 233         block[5 + i] = (tmp2 - tmp5)>>6;
 234         block[4 + i] = (tmp3 + tmp4)>>6;
 235         block[3 + i] = (tmp3 - tmp4)>>6;
 236     }
 237 }
 238
 239 static av_cold void init_vlcs(FourXContext *f){
 240     static VLC_TYPE table[8][32][2];
 241     int i;
 242
 243     for(i=0; i<8; i++){
 244         block_type_vlc[0][i].table= table[i];
 245         block_type_vlc[0][i].table_allocated= 32;
 246         init_vlc(&block_type_vlc[0][i], BLOCK_TYPE_VLC_BITS, 7,
 247                  &block_type_tab[0][i][0][1], 2, 1,
 248                  &block_type_tab[0][i][0][0], 2, 1, INIT_VLC_USE_NEW_STATIC);
 249     }
 250 }
 251
 252 static void init_mv(FourXContext *f){
 253     int i;
 254
 255     for(i=0; i<256; i++){
 256         if(f->version>1)
 257             f->mv[i] = mv[i][0]   + mv[i][1]  *f->current_picture.linesize[0]/2;
 258         else
 259             f->mv[i] = (i&15) - 8 + ((i>>4)-8)*f->current_picture.linesize[0]/2;
 260     }
 261 }
 262
 263 #if HAVE_BIGENDIAN
 264 #define LE_CENTRIC_MUL(dst, src, scale, dc) \
 265     { \
 266         unsigned tmpval = AV_RN32(src);                 \
 267         tmpval = (tmpval <<  16) | (tmpval >>  16);     \
 268         tmpval = tmpval * (scale) + (dc);               \
 269         tmpval = (tmpval <<  16) | (tmpval >>  16);     \
 270         AV_WN32A(dst, tmpval);                          \
 271     }
 272 #else
 273 #define LE_CENTRIC_MUL(dst, src, scale, dc) \
 274     { \
 275         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 276         AV_WN32A(dst, tmpval);                           \
 277     }
 278 #endif
 279
 280 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, unsigned dc){
 281    int i;
 282    dc*= 0x10001;
 283
 284    switch(log2w){
 285    case 0:
 286         for(i=0; i<h; i++){
 287             dst[0] = scale*src[0] + dc;
 288             if(scale) src += stride;
 289             dst += stride;
 290         }
 291         break;
 292     case 1:
 293         for(i=0; i<h; i++){
 294             LE_CENTRIC_MUL(dst, src, scale, dc);
 295             if(scale) src += stride;
 296             dst += stride;
 297         }
 298         break;
 299     case 2:
 300         for(i=0; i<h; i++){
 301             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 302             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 303             if(scale) src += stride;
 304             dst += stride;
 305         }
 306         break;
 307     case 3:
 308         for(i=0; i<h; i++){
 309             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 310             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 311             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 312             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 313             if(scale) src += stride;
 314             dst += stride;
 315         }
 316         break;
 317     default: assert(0);
 318     }
 319 }
 320
 321 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){
 322     const int index= size2index[log2h][log2w];
 323     const int h= 1<<log2h;
 324     int code= get_vlc2(&f->gb, block_type_vlc[1-(f->version>1)][index].table, BLOCK_TYPE_VLC_BITS, 1);
 325     uint16_t *start= (uint16_t*)f->last_picture.data[0];
 326     uint16_t *end= start + stride*(f->avctx->height-h+1) - (1<<log2w);
 327
 328     assert(code>=0 && code<=6);
 329
 330     if(code == 0){
 331         src += f->mv[ *f->bytestream++ ];
 332         if(start > src || src > end){
 333             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 334             return;
 335         }
 336         mcdc(dst, src, log2w, h, stride, 1, 0);
 337     }else if(code == 1){
 338         log2h--;
 339         decode_p_block(f, dst                  , src                  , log2w, log2h, stride);
 340         decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride);
 341     }else if(code == 2){
 342         log2w--;
 343         decode_p_block(f, dst             , src             , log2w, log2h, stride);
 344         decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride);
 345     }else if(code == 3 && f->version<2){
 346         mcdc(dst, src, log2w, h, stride, 1, 0);
 347     }else if(code == 4){
 348         src += f->mv[ *f->bytestream++ ];
 349         if(start > src || src > end){
 350             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 351             return;
 352         }
 353         mcdc(dst, src, log2w, h, stride, 1, av_le2ne16(*f->wordstream++));
 354     }else if(code == 5){
 355         mcdc(dst, src, log2w, h, stride, 0, av_le2ne16(*f->wordstream++));
 356     }else if(code == 6){
 357         if(log2w){
 358             dst[0] = av_le2ne16(*f->wordstream++);
 359             dst[1] = av_le2ne16(*f->wordstream++);
 360         }else{
 361             dst[0     ] = av_le2ne16(*f->wordstream++);
 362             dst[stride] = av_le2ne16(*f->wordstream++);
 363         }
 364     }
 365 }
 366
 367 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length){
 368     int x, y;
 369     const int width= f->avctx->width;
 370     const int height= f->avctx->height;
 371     uint16_t *src= (uint16_t*)f->last_picture.data[0];
 372     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 373     const int stride= f->current_picture.linesize[0]>>1;
 374     unsigned int bitstream_size, bytestream_size, wordstream_size, extra;
 375
 376     if(f->version>1){
 377         extra=20;
 378         bitstream_size= AV_RL32(buf+8);
 379         wordstream_size= AV_RL32(buf+12);
 380         bytestream_size= AV_RL32(buf+16);
 381     }else{
 382         extra=0;
 383         bitstream_size = AV_RL16(buf-4);
 384         wordstream_size= AV_RL16(buf-2);
 385         bytestream_size= FFMAX(length - bitstream_size - wordstream_size, 0);
 386     }
 387
 388     if(bitstream_size+ bytestream_size+ wordstream_size + extra != length
 389        || bitstream_size  > (1<<26)
 390        || bytestream_size > (1<<26)
 391        || wordstream_size > (1<<26)
 392        ){
 393         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
 394         bitstream_size+ bytestream_size+ wordstream_size - length);
 395         return -1;
 396     }
 397
 398     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 399     if (!f->bitstream_buffer)
 400         return AVERROR(ENOMEM);
 401     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra), bitstream_size/4);
 402     memset((uint8_t*)f->bitstream_buffer + bitstream_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 403     init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);
 404
 405     f->wordstream= (const uint16_t*)(buf + extra + bitstream_size);
 406     f->bytestream= buf + extra + bitstream_size + wordstream_size;
 407
 408     init_mv(f);
 409
 410     for(y=0; y<height; y+=8){
 411         for(x=0; x<width; x+=8){
 412             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 413         }
 414         src += 8*stride;
 415         dst += 8*stride;
 416     }
 417
 418     if(   bitstream_size != (get_bits_count(&f->gb)+31)/32*4
 419        || (((const char*)f->wordstream - (const char*)buf + 2)&~2) != extra + bitstream_size + wordstream_size
 420        || (((const char*)f->bytestream - (const char*)buf + 3)&~3) != extra + bitstream_size + wordstream_size + bytestream_size)
 421         av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n",
 422             bitstream_size - (get_bits_count(&f->gb)+31)/32*4,
 423             -(((const char*)f->bytestream - (const char*)buf + 3)&~3) + (extra + bitstream_size + wordstream_size + bytestream_size),
 424             -(((const char*)f->wordstream - (const char*)buf + 2)&~2) + (extra + bitstream_size + wordstream_size)
 425         );
 426
 427     return 0;
 428 }
 429
 430 /**
 431  * decode block and dequantize.
 432  * Note this is almost identical to MJPEG.
 433  */
 434 static int decode_i_block(FourXContext *f, DCTELEM *block){
 435     int code, i, j, level, val;
 436
 437     /* DC coef */
 438     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 439     if (val>>4){
 440         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 441     }
 442
 443     if(val)
 444         val = get_xbits(&f->gb, val);
 445
 446     val = val * dequant_table[0] + f->last_dc;
 447     f->last_dc =
 448     block[0] = val;
 449     /* AC coefs */
 450     i = 1;
 451     for(;;) {
 452         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 453
 454         /* EOB */
 455         if (code == 0)
 456             break;
 457         if (code == 0xf0) {
 458             i += 16;
 459         } else {
 460             level = get_xbits(&f->gb, code & 0xf);
 461             i += code >> 4;
 462             if (i >= 64) {
 463                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 464                 return 0;
 465             }
 466
 467             j= ff_zigzag_direct[i];
 468             block[j] = level * dequant_table[j];
 469             i++;
 470             if (i >= 64)
 471                 break;
 472         }
 473     }
 474
 475     return 0;
 476 }
 477
 478 static inline void idct_put(FourXContext *f, int x, int y){
 479     DCTELEM (*block)[64]= f->block;
 480     int stride= f->current_picture.linesize[0]>>1;
 481     int i;
 482     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
 483
 484     for(i=0; i<4; i++){
 485         block[i][0] += 0x80*8*8;
 486         idct(block[i]);
 487     }
 488
 489     if(!(f->avctx->flags&CODEC_FLAG_GRAY)){
 490         for(i=4; i<6; i++) idct(block[i]);
 491     }
 492
 493 /* Note transform is:
 494 y= ( 1b + 4g + 2r)/14
 495 cb=( 3b - 2g - 1r)/14
 496 cr=(-1b - 4g + 5r)/14
 497 */
 498     for(y=0; y<8; y++){
 499         for(x=0; x<8; x++){
 500             DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize
 501             int cb= block[4][x + 8*y];
 502             int cr= block[5][x + 8*y];
 503             int cg= (cb + cr)>>1;
 504             int y;
 505
 506             cb+=cb;
 507
 508             y = temp[0];
 509             dst[0       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 510             y = temp[1];
 511             dst[1       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 512             y = temp[8];
 513             dst[  stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 514             y = temp[9];
 515             dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 516             dst += 2;
 517         }
 518         dst += 2*stride - 2*8;
 519     }
 520 }
 521
 522 static int decode_i_mb(FourXContext *f){
 523     int i;
 524
 525     f->dsp.clear_blocks(f->block[0]);
 526
 527     for(i=0; i<6; i++){
 528         if(decode_i_block(f, f->block[i]) < 0)
 529             return -1;
 530     }
 531
 532     return 0;
 533 }
 534
 535 static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const buf){
 536     int frequency[512];
 537     uint8_t flag[512];
 538     int up[512];
 539     uint8_t len_tab[257];
 540     int bits_tab[257];
 541     int start, end;
 542     const uint8_t *ptr= buf;
 543     int j;
 544
 545     memset(frequency, 0, sizeof(frequency));
 546     memset(up, -1, sizeof(up));
 547
 548     start= *ptr++;
 549     end= *ptr++;
 550     for(;;){
 551         int i;
 552
 553         for(i=start; i<=end; i++){
 554             frequency[i]= *ptr++;
 555         }
 556         start= *ptr++;
 557         if(start==0) break;
 558
 559         end= *ptr++;
 560     }
 561     frequency[256]=1;
 562
 563     while((ptr - buf)&3) ptr++; // 4byte align
 564
 565     for(j=257; j<512; j++){
 566         int min_freq[2]= {256*256, 256*256};
 567         int smallest[2]= {0, 0};
 568         int i;
 569         for(i=0; i<j; i++){
 570             if(frequency[i] == 0) continue;
 571             if(frequency[i] < min_freq[1]){
 572                 if(frequency[i] < min_freq[0]){
 573                     min_freq[1]= min_freq[0]; smallest[1]= smallest[0];
 574                     min_freq[0]= frequency[i];smallest[0]= i;
 575                 }else{
 576                     min_freq[1]= frequency[i];smallest[1]= i;
 577                 }
 578             }
 579         }
 580         if(min_freq[1] == 256*256) break;
 581
 582         frequency[j]= min_freq[0] + min_freq[1];
 583         flag[ smallest[0] ]= 0;
 584         flag[ smallest[1] ]= 1;
 585         up[ smallest[0] ]=
 586         up[ smallest[1] ]= j;
 587         frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0;
 588     }
 589
 590     for(j=0; j<257; j++){
 591         int node;
 592         int len=0;
 593         int bits=0;
 594
 595         for(node= j; up[node] != -1; node= up[node]){
 596             bits += flag[node]<<len;
 597             len++;
 598             if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ?
 599         }
 600
 601         bits_tab[j]= bits;
 602         len_tab[j]= len;
 603     }
 604
 605     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257,
 606                  len_tab , 1, 1,
 607                  bits_tab, 4, 4, 0))
 608         return NULL;
 609
 610     return ptr;
 611 }
 612
 613 static int mix(int c0, int c1){
 614     int blue = 2*(c0&0x001F) + (c1&0x001F);
 615     int green= (2*(c0&0x03E0) + (c1&0x03E0))>>5;
 616     int red  = 2*(c0>>10) + (c1>>10);
 617     return red/3*1024 + green/3*32 + blue/3;
 618 }
 619
 620 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length){
 621     int x, y, x2, y2;
 622     const int width= f->avctx->width;
 623     const int height= f->avctx->height;
 624     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 625     const int stride= f->current_picture.linesize[0]>>1;
 626
 627     for(y=0; y<height; y+=16){
 628         for(x=0; x<width; x+=16){
 629             unsigned int color[4], bits;
 630             memset(color, 0, sizeof(color));
 631 //warning following is purely guessed ...
 632             color[0]= bytestream_get_le16(&buf);
 633             color[1]= bytestream_get_le16(&buf);
 634
 635             if(color[0]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 636             if(color[1]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 637
 638             color[2]= mix(color[0], color[1]);
 639             color[3]= mix(color[1], color[0]);
 640
 641             bits= bytestream_get_le32(&buf);
 642             for(y2=0; y2<16; y2++){
 643                 for(x2=0; x2<16; x2++){
 644                     int index= 2*(x2>>2) + 8*(y2>>2);
 645                     dst[y2*stride+x2]= color[(bits>>index)&3];
 646                 }
 647             }
 648             dst+=16;
 649         }
 650         dst += 16*stride - width;
 651     }
 652
 653     return 0;
 654 }
 655
 656 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length){
 657     int x, y;
 658     const int width= f->avctx->width;
 659     const int height= f->avctx->height;
 660     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 661     const int stride= f->current_picture.linesize[0]>>1;
 662     const unsigned int bitstream_size= AV_RL32(buf);
 663     const int token_count av_unused = AV_RL32(buf + bitstream_size + 8);
 664     unsigned int prestream_size= 4*AV_RL32(buf + bitstream_size + 4);
 665     const uint8_t *prestream= buf + bitstream_size + 12;
 666
 667     if(prestream_size + bitstream_size + 12 != length
 668        || bitstream_size > (1<<26)
 669        || prestream_size > (1<<26)){
 670         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
 671         return -1;
 672     }
 673
 674     prestream= read_huffman_tables(f, prestream);
 675
 676     init_get_bits(&f->gb, buf + 4, 8*bitstream_size);
 677
 678     prestream_size= length + buf - prestream;
 679
 680     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 681     if (!f->bitstream_buffer)
 682         return AVERROR(ENOMEM);
 683     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream, prestream_size/4);
 684     memset((uint8_t*)f->bitstream_buffer + prestream_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 685     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size);
 686
 687     f->last_dc= 0*128*8*8;
 688
 689     for(y=0; y<height; y+=16){
 690         for(x=0; x<width; x+=16){
 691             if(decode_i_mb(f) < 0)
 692                 return -1;
 693
 694             idct_put(f, x, y);
 695         }
 696         dst += 16*stride;
 697     }
 698
 699     if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 700         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 701
 702     return 0;
 703 }
 704
 705 static int decode_frame(AVCodecContext *avctx,
 706                         void *data, int *data_size,
 707                         AVPacket *avpkt)
 708 {
 709     const uint8_t *buf = avpkt->data;
 710     int buf_size = avpkt->size;
 711     FourXContext * const f = avctx->priv_data;
 712     AVFrame *picture = data;
 713     AVFrame *p, temp;
 714     int i, frame_4cc, frame_size;
 715
 716     frame_4cc= AV_RL32(buf);
 717     if(buf_size != AV_RL32(buf+4)+8 || buf_size < 20){
 718         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, AV_RL32(buf+4));
 719     }
 720
 721     if(frame_4cc == AV_RL32("cfrm")){
 722         int free_index=-1;
 723         const int data_size= buf_size - 20;
 724         const int id= AV_RL32(buf+12);
 725         const int whole_size= AV_RL32(buf+16);
 726         CFrameBuffer *cfrm;
 727
 728         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 729             if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 730                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
 731         }
 732
 733         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 734             if(f->cfrm[i].id   == id) break;
 735             if(f->cfrm[i].size == 0 ) free_index= i;
 736         }
 737
 738         if(i>=CFRAME_BUFFER_COUNT){
 739             i= free_index;
 740             f->cfrm[i].id= id;
 741         }
 742         cfrm= &f->cfrm[i];
 743
 744         cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 745         if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL
 746             av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
 747             return -1;
 748         }
 749
 750         memcpy(cfrm->data + cfrm->size, buf+20, data_size);
 751         cfrm->size += data_size;
 752
 753         if(cfrm->size >= whole_size){
 754             buf= cfrm->data;
 755             frame_size= cfrm->size;
 756
 757             if(id != avctx->frame_number){
 758                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number);
 759             }
 760
 761             cfrm->size= cfrm->id= 0;
 762             frame_4cc= AV_RL32("pfrm");
 763         }else
 764             return buf_size;
 765     }else{
 766         buf= buf + 12;
 767         frame_size= buf_size - 12;
 768     }
 769
 770     temp= f->current_picture;
 771     f->current_picture= f->last_picture;
 772     f->last_picture= temp;
 773
 774     p= &f->current_picture;
 775     avctx->coded_frame= p;
 776
 777     avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management
 778
 779     if(p->data[0])
 780         avctx->release_buffer(avctx, p);
 781
 782     p->reference= 1;
 783     if(avctx->get_buffer(avctx, p) < 0){
 784         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 785         return -1;
 786     }
 787
 788     if(frame_4cc == AV_RL32("ifr2")){
 789         p->pict_type= AV_PICTURE_TYPE_I;
 790         if(decode_i2_frame(f, buf-4, frame_size) < 0)
 791             return -1;
 792     }else if(frame_4cc == AV_RL32("ifrm")){
 793         p->pict_type= AV_PICTURE_TYPE_I;
 794         if(decode_i_frame(f, buf, frame_size) < 0)
 795             return -1;
 796     }else if(frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")){
 797         if(!f->last_picture.data[0]){
 798             f->last_picture.reference= 1;
 799             if(avctx->get_buffer(avctx, &f->last_picture) < 0){
 800                 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 801                 return -1;
 802             }
 803         }
 804
 805         p->pict_type= AV_PICTURE_TYPE_P;
 806         if(decode_p_frame(f, buf, frame_size) < 0)
 807             return -1;
 808     }else if(frame_4cc == AV_RL32("snd_")){
 809         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size);
 810     }else{
 811         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size);
 812     }
 813
 814     p->key_frame= p->pict_type == AV_PICTURE_TYPE_I;
 815
 816     *picture= *p;
 817     *data_size = sizeof(AVPicture);
 818
 819     emms_c();
 820
 821     return buf_size;
 822 }
 823
 824
 825 static av_cold void common_init(AVCodecContext *avctx){
 826     FourXContext * const f = avctx->priv_data;
 827
 828     dsputil_init(&f->dsp, avctx);
 829
 830     f->avctx= avctx;
 831 }
 832
 833 static av_cold int decode_init(AVCodecContext *avctx){
 834     FourXContext * const f = avctx->priv_data;
 835
 836     if(avctx->extradata_size != 4 || !avctx->extradata) {
 837         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 838         return 1;
 839     }
 840
 841     f->version= AV_RL32(avctx->extradata)>>16;
 842     common_init(avctx);
 843     init_vlcs(f);
 844
 845     if(f->version>2) avctx->pix_fmt= PIX_FMT_RGB565;
 846     else             avctx->pix_fmt= PIX_FMT_BGR555;
 847
 848     return 0;
 849 }
 850
 851
 852 static av_cold int decode_end(AVCodecContext *avctx){
 853     FourXContext * const f = avctx->priv_data;
 854     int i;
 855
 856     av_freep(&f->bitstream_buffer);
 857     f->bitstream_buffer_size=0;
 858     for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 859         av_freep(&f->cfrm[i].data);
 860         f->cfrm[i].allocated_size= 0;
 861     }
 862     free_vlc(&f->pre_vlc);
 863     if(f->current_picture.data[0])
 864         avctx->release_buffer(avctx, &f->current_picture);
 865     if(f->last_picture.data[0])
 866         avctx->release_buffer(avctx, &f->last_picture);
 867
 868     return 0;
 869 }
 870
 871 AVCodec ff_fourxm_decoder = {
 872     .name           = "4xm",
 873     .type           = AVMEDIA_TYPE_VIDEO,
 874     .id             = CODEC_ID_4XM,
 875     .priv_data_size = sizeof(FourXContext),
 876     .init           = decode_init,
 877     .close          = decode_end,
 878     .decode         = decode_frame,
 879     .capabilities   = CODEC_CAP_DR1,
 880     .long_name = NULL_IF_CONFIG_SMALL("4X Movie"),
 881 };
 882