git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/frame.h"
  28 #include "libavutil/intreadwrite.h"
  29 #include "avcodec.h"
  30 #include "bytestream.h"
  31 #include "dsputil.h"
  32 #include "get_bits.h"
  33 #include "internal.h"
  34
  35 #define BLOCK_TYPE_VLC_BITS 5
  36 #define ACDC_VLC_BITS 9
  37
  38 #define CFRAME_BUFFER_COUNT 100
  39
  40 static const uint8_t block_type_tab[2][4][8][2] = {
  41     {
  42         {    // { 8, 4, 2 } x { 8, 4, 2}
  43             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  44         }, { // { 8, 4 } x 1
  45             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  46         }, { // 1 x { 8, 4 }
  47             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  48         }, { // 1 x 2, 2 x 1
  49             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  50         }
  51     }, {
  52         {   // { 8, 4, 2 } x { 8, 4, 2}
  53             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  54         }, {// { 8, 4 } x 1
  55             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  56         }, {// 1 x { 8, 4 }
  57             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  58         }, {// 1 x 2, 2 x 1
  59             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  60       }
  61     }
  62 };
  63
  64 static const uint8_t size2index[4][4] = {
  65     { -1, 3, 1, 1 },
  66     {  3, 0, 0, 0 },
  67     {  2, 0, 0, 0 },
  68     {  2, 0, 0, 0 },
  69 };
  70
  71 static const int8_t mv[256][2] = {
  72     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  73     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  74     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  75     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  76     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  77     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  78     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  79     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  80     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  81     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  82     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  83     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  84     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  85     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  86     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  87     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  88     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  89     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  90     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  91     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  92     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  93     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  94     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  95     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
  96     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
  97     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
  98     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
  99     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 100     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 101     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 102     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 103     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 104 };
 105
 106 /* This is simply the scaled down elementwise product of the standard JPEG
 107  * quantizer table and the AAN premul table. */
 108 static const uint8_t dequant_table[64] = {
 109     16, 15, 13, 19, 24, 31, 28, 17,
 110     17, 23, 25, 31, 36, 63, 45, 21,
 111     18, 24, 27, 37, 52, 59, 49, 20,
 112     16, 28, 34, 40, 60, 80, 51, 20,
 113     18, 31, 48, 66, 68, 86, 56, 21,
 114     19, 38, 56, 59, 64, 64, 48, 20,
 115     27, 48, 55, 55, 56, 51, 35, 15,
 116     20, 35, 34, 32, 31, 22, 15,  8,
 117 };
 118
 119 static VLC block_type_vlc[2][4];
 120
 121
 122 typedef struct CFrameBuffer {
 123     unsigned int allocated_size;
 124     unsigned int size;
 125     int id;
 126     uint8_t *data;
 127 } CFrameBuffer;
 128
 129 typedef struct FourXContext {
 130     AVCodecContext *avctx;
 131     DSPContext dsp;
 132     AVFrame *last_picture;
 133     GetBitContext pre_gb;          ///< ac/dc prefix
 134     GetBitContext gb;
 135     GetByteContext g;
 136     GetByteContext g2;
 137     int mv[256];
 138     VLC pre_vlc;
 139     int last_dc;
 140     DECLARE_ALIGNED(16, int16_t, block)[6][64];
 141     void *bitstream_buffer;
 142     unsigned int bitstream_buffer_size;
 143     int version;
 144     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 145 } FourXContext;
 146
 147
 148 #define FIX_1_082392200  70936
 149 #define FIX_1_414213562  92682
 150 #define FIX_1_847759065 121095
 151 #define FIX_2_613125930 171254
 152
 153 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 154
 155 static void idct(int16_t block[64])
 156 {
 157     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 158     int tmp10, tmp11, tmp12, tmp13;
 159     int z5, z10, z11, z12, z13;
 160     int i;
 161     int temp[64];
 162
 163     for (i = 0; i < 8; i++) {
 164         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 165         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 166
 167         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 168         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 169
 170         tmp0 = tmp10 + tmp13;
 171         tmp3 = tmp10 - tmp13;
 172         tmp1 = tmp11 + tmp12;
 173         tmp2 = tmp11 - tmp12;
 174
 175         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 176         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 177         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 178         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 179
 180         tmp7  =          z11 + z13;
 181         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 182
 183         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 184         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 185         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 186
 187         tmp6 = tmp12 - tmp7;
 188         tmp5 = tmp11 - tmp6;
 189         tmp4 = tmp10 + tmp5;
 190
 191         temp[8 * 0 + i] = tmp0 + tmp7;
 192         temp[8 * 7 + i] = tmp0 - tmp7;
 193         temp[8 * 1 + i] = tmp1 + tmp6;
 194         temp[8 * 6 + i] = tmp1 - tmp6;
 195         temp[8 * 2 + i] = tmp2 + tmp5;
 196         temp[8 * 5 + i] = tmp2 - tmp5;
 197         temp[8 * 4 + i] = tmp3 + tmp4;
 198         temp[8 * 3 + i] = tmp3 - tmp4;
 199     }
 200
 201     for (i = 0; i < 8 * 8; i += 8) {
 202         tmp10 = temp[0 + i] + temp[4 + i];
 203         tmp11 = temp[0 + i] - temp[4 + i];
 204
 205         tmp13 = temp[2 + i] + temp[6 + i];
 206         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 207
 208         tmp0 = tmp10 + tmp13;
 209         tmp3 = tmp10 - tmp13;
 210         tmp1 = tmp11 + tmp12;
 211         tmp2 = tmp11 - tmp12;
 212
 213         z13 = temp[5 + i] + temp[3 + i];
 214         z10 = temp[5 + i] - temp[3 + i];
 215         z11 = temp[1 + i] + temp[7 + i];
 216         z12 = temp[1 + i] - temp[7 + i];
 217
 218         tmp7  = z11 + z13;
 219         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 220
 221         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 222         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 223         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 224
 225         tmp6 = tmp12 - tmp7;
 226         tmp5 = tmp11 - tmp6;
 227         tmp4 = tmp10 + tmp5;
 228
 229         block[0 + i] = (tmp0 + tmp7) >> 6;
 230         block[7 + i] = (tmp0 - tmp7) >> 6;
 231         block[1 + i] = (tmp1 + tmp6) >> 6;
 232         block[6 + i] = (tmp1 - tmp6) >> 6;
 233         block[2 + i] = (tmp2 + tmp5) >> 6;
 234         block[5 + i] = (tmp2 - tmp5) >> 6;
 235         block[4 + i] = (tmp3 + tmp4) >> 6;
 236         block[3 + i] = (tmp3 - tmp4) >> 6;
 237     }
 238 }
 239
 240 static av_cold void init_vlcs(FourXContext *f)
 241 {
 242     static VLC_TYPE table[2][4][32][2];
 243     int i, j;
 244
 245     for (i = 0; i < 2; i++) {
 246         for (j = 0; j < 4; j++) {
 247             block_type_vlc[i][j].table           = table[i][j];
 248             block_type_vlc[i][j].table_allocated = 32;
 249             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 250                      &block_type_tab[i][j][0][1], 2, 1,
 251                      &block_type_tab[i][j][0][0], 2, 1,
 252                      INIT_VLC_USE_NEW_STATIC);
 253         }
 254     }
 255 }
 256
 257 static void init_mv(FourXContext *f, int linesize)
 258 {
 259     int i;
 260
 261     for (i = 0; i < 256; i++) {
 262         if (f->version > 1)
 263             f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
 264         else
 265             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
 266     }
 267 }
 268
 269 #if HAVE_BIGENDIAN
 270 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 271     {                                                   \
 272         unsigned tmpval = AV_RN32(src);                 \
 273         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 274         tmpval = tmpval * (scale) + (dc);               \
 275         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 276         AV_WN32A(dst, tmpval);                          \
 277     }
 278 #else
 279 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 280     {                                                    \
 281         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 282         AV_WN32A(dst, tmpval);                           \
 283     }
 284 #endif
 285
 286 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 287                         int h, int stride, int scale, unsigned dc)
 288 {
 289     int i;
 290     dc *= 0x10001;
 291
 292     switch (log2w) {
 293     case 0:
 294         for (i = 0; i < h; i++) {
 295             dst[0] = scale * src[0] + dc;
 296             if (scale)
 297                 src += stride;
 298             dst += stride;
 299         }
 300         break;
 301     case 1:
 302         for (i = 0; i < h; i++) {
 303             LE_CENTRIC_MUL(dst, src, scale, dc);
 304             if (scale)
 305                 src += stride;
 306             dst += stride;
 307         }
 308         break;
 309     case 2:
 310         for (i = 0; i < h; i++) {
 311             LE_CENTRIC_MUL(dst, src, scale, dc);
 312             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 313             if (scale)
 314                 src += stride;
 315             dst += stride;
 316         }
 317         break;
 318     case 3:
 319         for (i = 0; i < h; i++) {
 320             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 321             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 322             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 323             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 324             if (scale)
 325                 src += stride;
 326             dst += stride;
 327         }
 328         break;
 329     default:
 330         break;
 331     }
 332 }
 333
 334 static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 335                           int log2w, int log2h, int stride)
 336 {
 337     const int index = size2index[log2h][log2w];
 338     const int h     = 1 << log2h;
 339     int code        = get_vlc2(&f->gb,
 340                                block_type_vlc[1 - (f->version > 1)][index].table,
 341                                BLOCK_TYPE_VLC_BITS, 1);
 342     uint16_t *start = (uint16_t *)f->last_picture->data[0];
 343     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 344     int ret;
 345
 346     if (code < 0 || code > 6 || log2w < 0)
 347         return AVERROR_INVALIDDATA;
 348
 349     if (code == 0) {
 350         src += f->mv[bytestream2_get_byte(&f->g)];
 351         if (start > src || src > end) {
 352             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 353             return AVERROR_INVALIDDATA;
 354         }
 355         mcdc(dst, src, log2w, h, stride, 1, 0);
 356     } else if (code == 1) {
 357         log2h--;
 358         if ((ret = decode_p_block(f, dst, src, log2w, log2h, stride)) < 0)
 359             return ret;
 360         if ((ret = decode_p_block(f, dst + (stride << log2h),
 361                                   src + (stride << log2h),
 362                                   log2w, log2h, stride)) < 0)
 363             return ret;
 364     } else if (code == 2) {
 365         log2w--;
 366         if ((ret = decode_p_block(f, dst , src, log2w, log2h, stride)) < 0)
 367             return ret;
 368         if ((ret = decode_p_block(f, dst + (1 << log2w),
 369                                   src + (1 << log2w),
 370                                   log2w, log2h, stride)) < 0)
 371             return ret;
 372     } else if (code == 3 && f->version < 2) {
 373         mcdc(dst, src, log2w, h, stride, 1, 0);
 374     } else if (code == 4) {
 375         src += f->mv[bytestream2_get_byte(&f->g)];
 376         if (start > src || src > end) {
 377             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 378             return AVERROR_INVALIDDATA;
 379         }
 380         mcdc(dst, src, log2w, h, stride, 1, bytestream2_get_le16(&f->g2));
 381     } else if (code == 5) {
 382         mcdc(dst, src, log2w, h, stride, 0, bytestream2_get_le16(&f->g2));
 383     } else if (code == 6) {
 384         if (log2w) {
 385             dst[0]      = bytestream2_get_le16(&f->g2);
 386             dst[1]      = bytestream2_get_le16(&f->g2);
 387         } else {
 388             dst[0]      = bytestream2_get_le16(&f->g2);
 389             dst[stride] = bytestream2_get_le16(&f->g2);
 390         }
 391     }
 392     return 0;
 393 }
 394
 395 static int decode_p_frame(FourXContext *f, AVFrame *frame,
 396                           const uint8_t *buf, int length)
 397 {
 398     int x, y;
 399     const int width  = f->avctx->width;
 400     const int height = f->avctx->height;
 401     uint16_t *dst    = (uint16_t *)frame->data[0];
 402     const int stride =             frame->linesize[0] >> 1;
 403     uint16_t *src;
 404     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 405                  bytestream_offset, wordstream_offset;
 406     int ret;
 407
 408     if (!f->last_picture->data[0]) {
 409         if ((ret = ff_get_buffer(f->avctx, f->last_picture,
 410                                  AV_GET_BUFFER_FLAG_REF)) < 0) {
 411             av_log(f->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 412             return ret;
 413         }
 414         memset(f->last_picture->data[0], 0,
 415                f->avctx->height * FFABS(f->last_picture->linesize[0]));
 416     }
 417
 418     src = (uint16_t *)f->last_picture->data[0];
 419
 420     if (f->version > 1) {
 421         if (length < 20)
 422             return AVERROR_INVALIDDATA;
 423         extra           = 20;
 424         bitstream_size  = AV_RL32(buf + 8);
 425         wordstream_size = AV_RL32(buf + 12);
 426         bytestream_size = AV_RL32(buf + 16);
 427     } else {
 428         extra           = 0;
 429         bitstream_size  = AV_RL16(buf - 4);
 430         wordstream_size = AV_RL16(buf - 2);
 431         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 432     }
 433
 434     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 435         || bitstream_size  > (1 << 26)
 436         || bytestream_size > (1 << 26)
 437         || wordstream_size > (1 << 26)) {
 438         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 439                bitstream_size, bytestream_size, wordstream_size,
 440                bitstream_size + bytestream_size + wordstream_size - length);
 441         return AVERROR_INVALIDDATA;
 442     }
 443
 444     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 445                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 446     if (!f->bitstream_buffer)
 447         return AVERROR(ENOMEM);
 448     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
 449                      bitstream_size / 4);
 450     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 451            0, FF_INPUT_BUFFER_PADDING_SIZE);
 452     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 453
 454     wordstream_offset = extra + bitstream_size;
 455     bytestream_offset = extra + bitstream_size + wordstream_size;
 456     bytestream2_init(&f->g2, buf + wordstream_offset,
 457                      length - wordstream_offset);
 458     bytestream2_init(&f->g, buf + bytestream_offset,
 459                      length - bytestream_offset);
 460
 461     init_mv(f, frame->linesize[0]);
 462
 463     for (y = 0; y < height; y += 8) {
 464         for (x = 0; x < width; x += 8)
 465             if ((ret = decode_p_block(f, dst + x, src + x, 3, 3, stride)) < 0)
 466                 return ret;
 467         src += 8 * stride;
 468         dst += 8 * stride;
 469     }
 470
 471     return 0;
 472 }
 473
 474 /**
 475  * decode block and dequantize.
 476  * Note this is almost identical to MJPEG.
 477  */
 478 static int decode_i_block(FourXContext *f, int16_t *block)
 479 {
 480     int code, i, j, level, val;
 481
 482     /* DC coef */
 483     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 484     if (val >> 4)
 485         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 486
 487     if (val)
 488         val = get_xbits(&f->gb, val);
 489
 490     val        = val * dequant_table[0] + f->last_dc;
 491     f->last_dc = block[0] = val;
 492     /* AC coefs */
 493     i = 1;
 494     for (;;) {
 495         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 496
 497         /* EOB */
 498         if (code == 0)
 499             break;
 500         if (code == 0xf0) {
 501             i += 16;
 502         } else {
 503             level = get_xbits(&f->gb, code & 0xf);
 504             i    += code >> 4;
 505             if (i >= 64) {
 506                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 507                 return 0;
 508             }
 509
 510             j = ff_zigzag_direct[i];
 511             block[j] = level * dequant_table[j];
 512             i++;
 513             if (i >= 64)
 514                 break;
 515         }
 516     }
 517
 518     return 0;
 519 }
 520
 521 static inline void idct_put(FourXContext *f, AVFrame *frame, int x, int y)
 522 {
 523     int16_t (*block)[64] = f->block;
 524     int stride           = frame->linesize[0] >> 1;
 525     int i;
 526     uint16_t *dst = ((uint16_t*)frame->data[0]) + y * stride + x;
 527
 528     for (i = 0; i < 4; i++) {
 529         block[i][0] += 0x80 * 8 * 8;
 530         idct(block[i]);
 531     }
 532
 533     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 534         for (i = 4; i < 6; i++)
 535             idct(block[i]);
 536     }
 537
 538     /* Note transform is:
 539      * y  = ( 1b + 4g + 2r) / 14
 540      * cb = ( 3b - 2g - 1r) / 14
 541      * cr = (-1b - 4g + 5r) / 14 */
 542     for (y = 0; y < 8; y++) {
 543         for (x = 0; x < 8; x++) {
 544             int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
 545                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 546             int cb = block[4][x + 8 * y];
 547             int cr = block[5][x + 8 * y];
 548             int cg = (cb + cr) >> 1;
 549             int y;
 550
 551             cb += cb;
 552
 553             y               = temp[0];
 554             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 555             y               = temp[1];
 556             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 557             y               = temp[8];
 558             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 559             y               = temp[9];
 560             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 561             dst            += 2;
 562         }
 563         dst += 2 * stride - 2 * 8;
 564     }
 565 }
 566
 567 static int decode_i_mb(FourXContext *f)
 568 {
 569     int ret;
 570     int i;
 571
 572     f->dsp.clear_blocks(f->block[0]);
 573
 574     for (i = 0; i < 6; i++)
 575         if ((ret = decode_i_block(f, f->block[i])) < 0)
 576             return ret;
 577
 578     return 0;
 579 }
 580
 581 static const uint8_t *read_huffman_tables(FourXContext *f,
 582                                           const uint8_t * const buf,
 583                                           int len)
 584 {
 585     int frequency[512] = { 0 };
 586     uint8_t flag[512];
 587     int up[512];
 588     uint8_t len_tab[257];
 589     int bits_tab[257];
 590     int start, end;
 591     const uint8_t *ptr = buf;
 592     int j;
 593
 594     memset(up, -1, sizeof(up));
 595
 596     start = *ptr++;
 597     end   = *ptr++;
 598     for (;;) {
 599         int i;
 600
 601         len -= end - start + 1;
 602
 603         if (end < start || len < 0)
 604             return NULL;
 605
 606         for (i = start; i <= end; i++)
 607             frequency[i] = *ptr++;
 608         start = *ptr++;
 609         if (start == 0)
 610             break;
 611
 612         if (--len < 0)
 613             return NULL;
 614
 615         end = *ptr++;
 616     }
 617     frequency[256] = 1;
 618
 619     while ((ptr - buf) & 3)
 620         ptr++; // 4byte align
 621
 622     for (j = 257; j < 512; j++) {
 623         int min_freq[2] = { 256 * 256, 256 * 256 };
 624         int smallest[2] = { 0, 0 };
 625         int i;
 626         for (i = 0; i < j; i++) {
 627             if (frequency[i] == 0)
 628                 continue;
 629             if (frequency[i] < min_freq[1]) {
 630                 if (frequency[i] < min_freq[0]) {
 631                     min_freq[1] = min_freq[0];
 632                     smallest[1] = smallest[0];
 633                     min_freq[0] = frequency[i];
 634                     smallest[0] = i;
 635                 } else {
 636                     min_freq[1] = frequency[i];
 637                     smallest[1] = i;
 638                 }
 639             }
 640         }
 641         if (min_freq[1] == 256 * 256)
 642             break;
 643
 644         frequency[j]           = min_freq[0] + min_freq[1];
 645         flag[smallest[0]]      = 0;
 646         flag[smallest[1]]      = 1;
 647         up[smallest[0]]        =
 648         up[smallest[1]]        = j;
 649         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 650     }
 651
 652     for (j = 0; j < 257; j++) {
 653         int node, len = 0, bits = 0;
 654
 655         for (node = j; up[node] != -1; node = up[node]) {
 656             bits += flag[node] << len;
 657             len++;
 658             if (len > 31)
 659                 // can this happen at all ?
 660                 av_log(f->avctx, AV_LOG_ERROR,
 661                        "vlc length overflow\n");
 662         }
 663
 664         bits_tab[j] = bits;
 665         len_tab[j]  = len;
 666     }
 667
 668     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 669                  bits_tab, 4, 4, 0))
 670         return NULL;
 671
 672     return ptr;
 673 }
 674
 675 static int mix(int c0, int c1)
 676 {
 677     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 678     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 679     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 680     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 681 }
 682
 683 static int decode_i2_frame(FourXContext *f, AVFrame *frame, const uint8_t *buf, int length)
 684 {
 685     int x, y, x2, y2;
 686     const int width  = f->avctx->width;
 687     const int height = f->avctx->height;
 688     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 689     uint16_t *dst    = (uint16_t*)frame->data[0];
 690     const int stride =            frame->linesize[0]>>1;
 691     GetByteContext g3;
 692
 693     if (length < mbs * 8) {
 694         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 695         return AVERROR_INVALIDDATA;
 696     }
 697     bytestream2_init(&g3, buf, length);
 698
 699     for (y = 0; y < height; y += 16) {
 700         for (x = 0; x < width; x += 16) {
 701             unsigned int color[4] = { 0 }, bits;
 702             // warning following is purely guessed ...
 703             color[0] = bytestream2_get_le16u(&g3);
 704             color[1] = bytestream2_get_le16u(&g3);
 705
 706             if (color[0] & 0x8000)
 707                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 1\n");
 708             if (color[1] & 0x8000)
 709                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 2\n");
 710
 711             color[2] = mix(color[0], color[1]);
 712             color[3] = mix(color[1], color[0]);
 713
 714             bits = bytestream2_get_le32u(&g3);
 715             for (y2 = 0; y2 < 16; y2++) {
 716                 for (x2 = 0; x2 < 16; x2++) {
 717                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 718                     dst[y2 * stride + x2] = color[(bits >> index) & 3];
 719                 }
 720             }
 721             dst += 16;
 722         }
 723         dst += 16 * stride - x;
 724     }
 725
 726     return 0;
 727 }
 728
 729 static int decode_i_frame(FourXContext *f, AVFrame *frame, const uint8_t *buf, int length)
 730 {
 731     int x, y, ret;
 732     const int width  = f->avctx->width;
 733     const int height = f->avctx->height;
 734     const unsigned int bitstream_size = AV_RL32(buf);
 735     int token_count av_unused;
 736     unsigned int prestream_size;
 737     const uint8_t *prestream;
 738
 739     if (length < bitstream_size + 12) {
 740         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 741         return AVERROR_INVALIDDATA;
 742     }
 743
 744     token_count    =     AV_RL32(buf + bitstream_size + 8);
 745     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 746     prestream      =             buf + bitstream_size + 12;
 747
 748     if (prestream_size + bitstream_size + 12 != length
 749         || bitstream_size > (1 << 26)
 750         || prestream_size > (1 << 26)) {
 751         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 752                prestream_size, bitstream_size, length);
 753         return AVERROR_INVALIDDATA;
 754     }
 755
 756     prestream = read_huffman_tables(f, prestream, prestream_size);
 757     if (!prestream) {
 758         av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
 759         return AVERROR_INVALIDDATA;
 760     }
 761
 762     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 763
 764     prestream_size = length + buf - prestream;
 765
 766     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 767                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 768     if (!f->bitstream_buffer)
 769         return AVERROR(ENOMEM);
 770     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
 771                      prestream_size / 4);
 772     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 773            0, FF_INPUT_BUFFER_PADDING_SIZE);
 774     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 775
 776     f->last_dc = 0 * 128 * 8 * 8;
 777
 778     for (y = 0; y < height; y += 16) {
 779         for (x = 0; x < width; x += 16) {
 780             if ((ret = decode_i_mb(f)) < 0)
 781                 return ret;
 782
 783             idct_put(f, frame, x, y);
 784         }
 785     }
 786
 787     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 788         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 789
 790     return 0;
 791 }
 792
 793 static int decode_frame(AVCodecContext *avctx, void *data,
 794                         int *got_frame, AVPacket *avpkt)
 795 {
 796     const uint8_t *buf    = avpkt->data;
 797     int buf_size          = avpkt->size;
 798     FourXContext *const f = avctx->priv_data;
 799     AVFrame *picture      = data;
 800     int i, frame_4cc, frame_size, ret;
 801
 802     if (buf_size < 20)
 803         return AVERROR_INVALIDDATA;
 804
 805     if (buf_size < AV_RL32(buf + 4) + 8) {
 806         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n",
 807                buf_size, AV_RL32(buf + 4));
 808         return AVERROR_INVALIDDATA;
 809     }
 810
 811     frame_4cc = AV_RL32(buf);
 812
 813     if (frame_4cc == AV_RL32("cfrm")) {
 814         int free_index       = -1;
 815         int id, whole_size;
 816         const int data_size  = buf_size - 20;
 817         CFrameBuffer *cfrm;
 818
 819         if (data_size < 0)
 820             return AVERROR_INVALIDDATA;
 821
 822         id         = AV_RL32(buf + 12);
 823         whole_size = AV_RL32(buf + 16);
 824
 825         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 826             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 827                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 828                        f->cfrm[i].id);
 829
 830         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 831             if (f->cfrm[i].id == id)
 832                 break;
 833             if (f->cfrm[i].size == 0)
 834                 free_index = i;
 835         }
 836
 837         if (i >= CFRAME_BUFFER_COUNT) {
 838             i             = free_index;
 839             f->cfrm[i].id = id;
 840         }
 841         cfrm = &f->cfrm[i];
 842
 843         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 844                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 845         // explicit check needed as memcpy below might not catch a NULL
 846         if (!cfrm->data) {
 847             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 848             return AVERROR(ENOMEM);
 849         }
 850
 851         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 852         cfrm->size += data_size;
 853
 854         if (cfrm->size >= whole_size) {
 855             buf        = cfrm->data;
 856             frame_size = cfrm->size;
 857
 858             if (id != avctx->frame_number)
 859                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 860                        id, avctx->frame_number);
 861
 862             if (f->version <= 1)
 863                 return AVERROR_INVALIDDATA;
 864
 865             cfrm->size = cfrm->id = 0;
 866             frame_4cc  = AV_RL32("pfrm");
 867         } else
 868             return buf_size;
 869     } else {
 870         buf        = buf      + 12;
 871         frame_size = buf_size - 12;
 872     }
 873
 874     // alternatively we would have to use our own buffer management
 875     avctx->flags |= CODEC_FLAG_EMU_EDGE;
 876
 877     if ((ret = ff_get_buffer(avctx, picture, AV_GET_BUFFER_FLAG_REF)) < 0) {
 878         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 879         return ret;
 880     }
 881
 882     if (frame_4cc == AV_RL32("ifr2")) {
 883         picture->pict_type = AV_PICTURE_TYPE_I;
 884         if ((ret = decode_i2_frame(f, picture, buf - 4, frame_size + 4)) < 0)
 885             return ret;
 886     } else if (frame_4cc == AV_RL32("ifrm")) {
 887         picture->pict_type = AV_PICTURE_TYPE_I;
 888         if ((ret = decode_i_frame(f, picture, buf, frame_size)) < 0)
 889             return ret;
 890     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 891         picture->pict_type = AV_PICTURE_TYPE_P;
 892         if ((ret = decode_p_frame(f, picture, buf, frame_size)) < 0)
 893             return ret;
 894     } else if (frame_4cc == AV_RL32("snd_")) {
 895         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 896                buf_size);
 897     } else {
 898         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 899                buf_size);
 900     }
 901
 902     picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
 903
 904     av_frame_unref(f->last_picture);
 905     if ((ret = av_frame_ref(f->last_picture, picture)) < 0)
 906         return ret;
 907     *got_frame = 1;
 908
 909     emms_c();
 910
 911     return buf_size;
 912 }
 913
 914 static av_cold int decode_init(AVCodecContext *avctx)
 915 {
 916     FourXContext * const f = avctx->priv_data;
 917
 918     if (avctx->extradata_size != 4 || !avctx->extradata) {
 919         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 920         return 1;
 921     }
 922
 923     f->version = AV_RL32(avctx->extradata) >> 16;
 924     ff_dsputil_init(&f->dsp, avctx);
 925     f->avctx = avctx;
 926     init_vlcs(f);
 927
 928     if (f->version > 2)
 929         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 930     else
 931         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 932
 933     f->last_picture = av_frame_alloc();
 934     if (!f->last_picture)
 935         return AVERROR(ENOMEM);
 936
 937     return 0;
 938 }
 939
 940
 941 static av_cold int decode_end(AVCodecContext *avctx)
 942 {
 943     FourXContext * const f = avctx->priv_data;
 944     int i;
 945
 946     av_freep(&f->bitstream_buffer);
 947     f->bitstream_buffer_size = 0;
 948     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 949         av_freep(&f->cfrm[i].data);
 950         f->cfrm[i].allocated_size = 0;
 951     }
 952     ff_free_vlc(&f->pre_vlc);
 953     av_frame_free(&f->last_picture);
 954
 955     return 0;
 956 }
 957
 958 AVCodec ff_fourxm_decoder = {
 959     .name           = "4xm",
 960     .type           = AVMEDIA_TYPE_VIDEO,
 961     .id             = AV_CODEC_ID_4XM,
 962     .priv_data_size = sizeof(FourXContext),
 963     .init           = decode_init,
 964     .close          = decode_end,
 965     .decode         = decode_frame,
 966     .capabilities   = CODEC_CAP_DR1,
 967     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 968 };