git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/intreadwrite.h"
  28 #include "avcodec.h"
  29 #include "bytestream.h"
  30 #include "dsputil.h"
  31 #include "get_bits.h"
  32
  33 //#undef NDEBUG
  34 //#include <assert.h>
  35
  36 #define BLOCK_TYPE_VLC_BITS 5
  37 #define ACDC_VLC_BITS 9
  38
  39 #define CFRAME_BUFFER_COUNT 100
  40
  41 static const uint8_t block_type_tab[2][4][8][2] = {
  42     {
  43         {    // { 8, 4, 2 } x { 8, 4, 2}
  44             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  45         }, { // { 8, 4 } x 1
  46             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  47         }, { // 1 x { 8, 4 }
  48             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  49         }, { // 1 x 2, 2 x 1
  50             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  51         }
  52     }, {
  53         {   // { 8, 4, 2 } x { 8, 4, 2}
  54             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  55         }, {// { 8, 4 } x 1
  56             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  57         }, {// 1 x { 8, 4 }
  58             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  59         }, {// 1 x 2, 2 x 1
  60             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  61       }
  62     }
  63 };
  64
  65 static const uint8_t size2index[4][4] = {
  66     { -1, 3, 1, 1 },
  67     {  3, 0, 0, 0 },
  68     {  2, 0, 0, 0 },
  69     {  2, 0, 0, 0 },
  70 };
  71
  72 static const int8_t mv[256][2] = {
  73     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  74     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  75     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  76     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  77     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  78     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  79     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  80     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  81     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  82     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  83     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  84     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  85     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  86     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  87     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  88     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  89     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  90     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  91     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  92     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  93     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  94     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  95     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  96     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
  97     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
  98     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
  99     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 100     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 101     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 102     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 103     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 104     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 105 };
 106
 107 /* This is simply the scaled down elementwise product of the standard JPEG
 108  * quantizer table and the AAN premul table. */
 109 static const uint8_t dequant_table[64] = {
 110     16, 15, 13, 19, 24, 31, 28, 17,
 111     17, 23, 25, 31, 36, 63, 45, 21,
 112     18, 24, 27, 37, 52, 59, 49, 20,
 113     16, 28, 34, 40, 60, 80, 51, 20,
 114     18, 31, 48, 66, 68, 86, 56, 21,
 115     19, 38, 56, 59, 64, 64, 48, 20,
 116     27, 48, 55, 55, 56, 51, 35, 15,
 117     20, 35, 34, 32, 31, 22, 15,  8,
 118 };
 119
 120 static VLC block_type_vlc[2][4];
 121
 122
 123 typedef struct CFrameBuffer {
 124     unsigned int allocated_size;
 125     unsigned int size;
 126     int id;
 127     uint8_t *data;
 128 } CFrameBuffer;
 129
 130 typedef struct FourXContext {
 131     AVCodecContext *avctx;
 132     DSPContext dsp;
 133     AVFrame current_picture, last_picture;
 134     GetBitContext pre_gb;          ///< ac/dc prefix
 135     GetBitContext gb;
 136     GetByteContext g;
 137     GetByteContext g2;
 138     int mv[256];
 139     VLC pre_vlc;
 140     int last_dc;
 141     DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
 142     void *bitstream_buffer;
 143     unsigned int bitstream_buffer_size;
 144     int version;
 145     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 146 } FourXContext;
 147
 148
 149 #define FIX_1_082392200  70936
 150 #define FIX_1_414213562  92682
 151 #define FIX_1_847759065 121095
 152 #define FIX_2_613125930 171254
 153
 154 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 155
 156 static void idct(DCTELEM block[64])
 157 {
 158     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 159     int tmp10, tmp11, tmp12, tmp13;
 160     int z5, z10, z11, z12, z13;
 161     int i;
 162     int temp[64];
 163
 164     for (i = 0; i < 8; i++) {
 165         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 166         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 167
 168         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 169         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 170
 171         tmp0 = tmp10 + tmp13;
 172         tmp3 = tmp10 - tmp13;
 173         tmp1 = tmp11 + tmp12;
 174         tmp2 = tmp11 - tmp12;
 175
 176         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 177         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 178         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 179         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 180
 181         tmp7  =          z11 + z13;
 182         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 183
 184         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 185         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 186         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 187
 188         tmp6 = tmp12 - tmp7;
 189         tmp5 = tmp11 - tmp6;
 190         tmp4 = tmp10 + tmp5;
 191
 192         temp[8 * 0 + i] = tmp0 + tmp7;
 193         temp[8 * 7 + i] = tmp0 - tmp7;
 194         temp[8 * 1 + i] = tmp1 + tmp6;
 195         temp[8 * 6 + i] = tmp1 - tmp6;
 196         temp[8 * 2 + i] = tmp2 + tmp5;
 197         temp[8 * 5 + i] = tmp2 - tmp5;
 198         temp[8 * 4 + i] = tmp3 + tmp4;
 199         temp[8 * 3 + i] = tmp3 - tmp4;
 200     }
 201
 202     for (i = 0; i < 8 * 8; i += 8) {
 203         tmp10 = temp[0 + i] + temp[4 + i];
 204         tmp11 = temp[0 + i] - temp[4 + i];
 205
 206         tmp13 = temp[2 + i] + temp[6 + i];
 207         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 208
 209         tmp0 = tmp10 + tmp13;
 210         tmp3 = tmp10 - tmp13;
 211         tmp1 = tmp11 + tmp12;
 212         tmp2 = tmp11 - tmp12;
 213
 214         z13 = temp[5 + i] + temp[3 + i];
 215         z10 = temp[5 + i] - temp[3 + i];
 216         z11 = temp[1 + i] + temp[7 + i];
 217         z12 = temp[1 + i] - temp[7 + i];
 218
 219         tmp7  = z11 + z13;
 220         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 221
 222         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 223         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 224         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 225
 226         tmp6 = tmp12 - tmp7;
 227         tmp5 = tmp11 - tmp6;
 228         tmp4 = tmp10 + tmp5;
 229
 230         block[0 + i] = (tmp0 + tmp7) >> 6;
 231         block[7 + i] = (tmp0 - tmp7) >> 6;
 232         block[1 + i] = (tmp1 + tmp6) >> 6;
 233         block[6 + i] = (tmp1 - tmp6) >> 6;
 234         block[2 + i] = (tmp2 + tmp5) >> 6;
 235         block[5 + i] = (tmp2 - tmp5) >> 6;
 236         block[4 + i] = (tmp3 + tmp4) >> 6;
 237         block[3 + i] = (tmp3 - tmp4) >> 6;
 238     }
 239 }
 240
 241 static av_cold void init_vlcs(FourXContext *f)
 242 {
 243     static VLC_TYPE table[8][32][2];
 244     int i;
 245
 246     for (i = 0; i < 8; i++) {
 247         block_type_vlc[0][i].table           = table[i];
 248         block_type_vlc[0][i].table_allocated = 32;
 249         init_vlc(&block_type_vlc[0][i], BLOCK_TYPE_VLC_BITS, 7,
 250                  &block_type_tab[0][i][0][1], 2, 1,
 251                  &block_type_tab[0][i][0][0], 2, 1, INIT_VLC_USE_NEW_STATIC);
 252     }
 253 }
 254
 255 static void init_mv(FourXContext *f)
 256 {
 257     int i;
 258
 259     for (i = 0; i < 256; i++) {
 260         if (f->version > 1)
 261             f->mv[i] = mv[i][0] + mv[i][1] * f->current_picture.linesize[0] / 2;
 262         else
 263             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * f->current_picture.linesize[0] / 2;
 264     }
 265 }
 266
 267 #if HAVE_BIGENDIAN
 268 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 269     {                                                   \
 270         unsigned tmpval = AV_RN32(src);                 \
 271         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 272         tmpval = tmpval * (scale) + (dc);               \
 273         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 274         AV_WN32A(dst, tmpval);                          \
 275     }
 276 #else
 277 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 278     {                                                    \
 279         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 280         AV_WN32A(dst, tmpval);                           \
 281     }
 282 #endif
 283
 284 static inline void mcdc(uint16_t *dst, const uint16_t *src, int log2w,
 285                         int h, int stride, int scale, unsigned dc)
 286 {
 287     int i;
 288     dc *= 0x10001;
 289
 290     switch (log2w) {
 291     case 0:
 292         for (i = 0; i < h; i++) {
 293             dst[0] = scale * src[0] + dc;
 294             if (scale)
 295                 src += stride;
 296             dst += stride;
 297         }
 298         break;
 299     case 1:
 300         for (i = 0; i < h; i++) {
 301             LE_CENTRIC_MUL(dst, src, scale, dc);
 302             if (scale)
 303                 src += stride;
 304             dst += stride;
 305         }
 306         break;
 307     case 2:
 308         for (i = 0; i < h; i++) {
 309             LE_CENTRIC_MUL(dst, src, scale, dc);
 310             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 311             if (scale)
 312                 src += stride;
 313             dst += stride;
 314         }
 315         break;
 316     case 3:
 317         for (i = 0; i < h; i++) {
 318             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 319             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 320             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 321             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 322             if (scale)
 323                 src += stride;
 324             dst += stride;
 325         }
 326         break;
 327     default:
 328         assert(0);
 329     }
 330 }
 331
 332 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 333                            int log2w, int log2h, int stride)
 334 {
 335     const int index = size2index[log2h][log2w];
 336     const int h     = 1 << log2h;
 337     int code        = get_vlc2(&f->gb,
 338                                block_type_vlc[1 - (f->version > 1)][index].table,
 339                                BLOCK_TYPE_VLC_BITS, 1);
 340     uint16_t *start = (uint16_t *)f->last_picture.data[0];
 341     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 342
 343     assert(code >= 0 && code <= 6);
 344
 345     if (code == 0) {
 346         if (f->g.buffer_end - f->g.buffer < 1) {
 347             av_log(f->avctx, AV_LOG_ERROR, "bytestream overread\n");
 348             return;
 349         }
 350         src += f->mv[bytestream2_get_byte(&f->g)];
 351         if (start > src || src > end) {
 352             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 353             return;
 354         }
 355         mcdc(dst, src, log2w, h, stride, 1, 0);
 356     } else if (code == 1) {
 357         log2h--;
 358         decode_p_block(f, dst, src, log2w, log2h, stride);
 359         decode_p_block(f, dst + (stride << log2h),
 360                           src + (stride << log2h), log2w, log2h, stride);
 361     } else if (code == 2) {
 362         log2w--;
 363         decode_p_block(f, dst , src, log2w, log2h, stride);
 364         decode_p_block(f, dst + (1 << log2w),
 365                           src + (1 << log2w), log2w, log2h, stride);
 366     } else if (code == 3 && f->version < 2) {
 367         mcdc(dst, src, log2w, h, stride, 1, 0);
 368     } else if (code == 4) {
 369         if (f->g.buffer_end - f->g.buffer < 1) {
 370             av_log(f->avctx, AV_LOG_ERROR, "bytestream overread\n");
 371             return;
 372         }
 373         src += f->mv[bytestream2_get_byte(&f->g)];
 374         if (start > src || src > end) {
 375             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 376             return;
 377         }
 378         if (f->g2.buffer_end - f->g2.buffer < 1){
 379             av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
 380             return;
 381         }
 382         mcdc(dst, src, log2w, h, stride, 1, bytestream2_get_le16(&f->g2));
 383     } else if (code == 5) {
 384         if (f->g2.buffer_end - f->g2.buffer < 1) {
 385             av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
 386             return;
 387         }
 388         mcdc(dst, src, log2w, h, stride, 0, bytestream2_get_le16(&f->g2));
 389     } else if (code == 6) {
 390         if (f->g2.buffer_end - f->g2.buffer < 2) {
 391             av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
 392             return;
 393         }
 394         if (log2w) {
 395             dst[0]      = bytestream2_get_le16(&f->g2);
 396             dst[1]      = bytestream2_get_le16(&f->g2);
 397         } else {
 398             dst[0]      = bytestream2_get_le16(&f->g2);
 399             dst[stride] = bytestream2_get_le16(&f->g2);
 400         }
 401     }
 402 }
 403
 404 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
 405 {
 406     int x, y;
 407     const int width  = f->avctx->width;
 408     const int height = f->avctx->height;
 409     uint16_t *src    = (uint16_t *)f->last_picture.data[0];
 410     uint16_t *dst    = (uint16_t *)f->current_picture.data[0];
 411     const int stride =             f->current_picture.linesize[0] >> 1;
 412     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 413                  bytestream_offset, wordstream_offset;
 414
 415     if (f->version > 1) {
 416         extra           = 20;
 417         if (length < extra)
 418             return -1;
 419         bitstream_size  = AV_RL32(buf + 8);
 420         wordstream_size = AV_RL32(buf + 12);
 421         bytestream_size = AV_RL32(buf + 16);
 422     } else {
 423         extra           = 0;
 424         bitstream_size  = AV_RL16(buf - 4);
 425         wordstream_size = AV_RL16(buf - 2);
 426         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 427     }
 428
 429     if (bitstream_size > length ||
 430         bytestream_size > length - bitstream_size ||
 431         wordstream_size > length - bytestream_size - bitstream_size ||
 432         extra > length - bytestream_size - bitstream_size - wordstream_size) {
 433         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
 434         bitstream_size+ bytestream_size+ wordstream_size - length);
 435         return -1;
 436     }
 437
 438     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 439                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 440     if (!f->bitstream_buffer)
 441         return AVERROR(ENOMEM);
 442     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
 443                      bitstream_size / 4);
 444     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 445            0, FF_INPUT_BUFFER_PADDING_SIZE);
 446     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 447
 448     wordstream_offset = extra + bitstream_size;
 449     bytestream_offset = extra + bitstream_size + wordstream_size;
 450     bytestream2_init(&f->g2, buf + wordstream_offset,
 451                      length - wordstream_offset);
 452     bytestream2_init(&f->g, buf + bytestream_offset,
 453                      length - bytestream_offset);
 454
 455     init_mv(f);
 456
 457     for (y = 0; y < height; y += 8) {
 458         for (x = 0; x < width; x += 8)
 459             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 460         src += 8 * stride;
 461         dst += 8 * stride;
 462     }
 463
 464     return 0;
 465 }
 466
 467 /**
 468  * decode block and dequantize.
 469  * Note this is almost identical to MJPEG.
 470  */
 471 static int decode_i_block(FourXContext *f, DCTELEM *block)
 472 {
 473     int code, i, j, level, val;
 474
 475     if (get_bits_left(&f->gb) < 2){
 476         av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->gb));
 477         return -1;
 478     }
 479
 480     /* DC coef */
 481     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 482     if (val >> 4)
 483         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 484
 485     if (val)
 486         val = get_xbits(&f->gb, val);
 487
 488     val        = val * dequant_table[0] + f->last_dc;
 489     f->last_dc = block[0] = val;
 490     /* AC coefs */
 491     i = 1;
 492     for (;;) {
 493         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 494
 495         /* EOB */
 496         if (code == 0)
 497             break;
 498         if (code == 0xf0) {
 499             i += 16;
 500         } else {
 501             level = get_xbits(&f->gb, code & 0xf);
 502             i    += code >> 4;
 503             if (i >= 64) {
 504                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 505                 return 0;
 506             }
 507
 508             j = ff_zigzag_direct[i];
 509             block[j] = level * dequant_table[j];
 510             i++;
 511             if (i >= 64)
 512                 break;
 513         }
 514     }
 515
 516     return 0;
 517 }
 518
 519 static inline void idct_put(FourXContext *f, int x, int y)
 520 {
 521     DCTELEM (*block)[64] = f->block;
 522     int stride           = f->current_picture.linesize[0] >> 1;
 523     int i;
 524     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
 525
 526     for (i = 0; i < 4; i++) {
 527         block[i][0] += 0x80 * 8 * 8;
 528         idct(block[i]);
 529     }
 530
 531     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 532         for (i = 4; i < 6; i++)
 533             idct(block[i]);
 534     }
 535
 536     /* Note transform is:
 537      * y  = ( 1b + 4g + 2r) / 14
 538      * cb = ( 3b - 2g - 1r) / 14
 539      * cr = (-1b - 4g + 5r) / 14 */
 540     for (y = 0; y < 8; y++) {
 541         for (x = 0; x < 8; x++) {
 542             DCTELEM *temp = block[(x >> 2) + 2 * (y >> 2)] +
 543                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 544             int cb = block[4][x + 8 * y];
 545             int cr = block[5][x + 8 * y];
 546             int cg = (cb + cr) >> 1;
 547             int y;
 548
 549             cb += cb;
 550
 551             y               = temp[0];
 552             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 553             y               = temp[1];
 554             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 555             y               = temp[8];
 556             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 557             y               = temp[9];
 558             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 559             dst            += 2;
 560         }
 561         dst += 2 * stride - 2 * 8;
 562     }
 563 }
 564
 565 static int decode_i_mb(FourXContext *f)
 566 {
 567     int i;
 568
 569     f->dsp.clear_blocks(f->block[0]);
 570
 571     for (i = 0; i < 6; i++)
 572         if (decode_i_block(f, f->block[i]) < 0)
 573             return -1;
 574
 575     return 0;
 576 }
 577
 578 static const uint8_t *read_huffman_tables(FourXContext *f,
 579                                           const uint8_t * const buf, int buf_size)
 580 {
 581     int frequency[512] = { 0 };
 582     uint8_t flag[512];
 583     int up[512];
 584     uint8_t len_tab[257];
 585     int bits_tab[257];
 586     int start, end;
 587     const uint8_t *ptr = buf;
 588     const uint8_t *ptr_end = buf + buf_size;
 589     int j;
 590
 591     memset(up, -1, sizeof(up));
 592
 593     start = *ptr++;
 594     end   = *ptr++;
 595     for (;;) {
 596         int i;
 597
 598         if (start <= end && ptr_end - ptr < end - start + 1 + 1)
 599             return NULL;
 600         for (i = start; i <= end; i++)
 601             frequency[i] = *ptr++;
 602         start = *ptr++;
 603         if (start == 0)
 604             break;
 605
 606         end = *ptr++;
 607     }
 608     frequency[256] = 1;
 609
 610     while ((ptr - buf) & 3)
 611         ptr++; // 4byte align
 612
 613     for (j = 257; j < 512; j++) {
 614         int min_freq[2] = { 256 * 256, 256 * 256 };
 615         int smallest[2] = { 0, 0 };
 616         int i;
 617         for (i = 0; i < j; i++) {
 618             if (frequency[i] == 0)
 619                 continue;
 620             if (frequency[i] < min_freq[1]) {
 621                 if (frequency[i] < min_freq[0]) {
 622                     min_freq[1] = min_freq[0];
 623                     smallest[1] = smallest[0];
 624                     min_freq[0] = frequency[i];
 625                     smallest[0] = i;
 626                 } else {
 627                     min_freq[1] = frequency[i];
 628                     smallest[1] = i;
 629                 }
 630             }
 631         }
 632         if (min_freq[1] == 256 * 256)
 633             break;
 634
 635         frequency[j]           = min_freq[0] + min_freq[1];
 636         flag[smallest[0]]      = 0;
 637         flag[smallest[1]]      = 1;
 638         up[smallest[0]]        =
 639         up[smallest[1]]        = j;
 640         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 641     }
 642
 643     for (j = 0; j < 257; j++) {
 644         int node, len = 0, bits = 0;
 645
 646         for (node = j; up[node] != -1; node = up[node]) {
 647             bits += flag[node] << len;
 648             len++;
 649             if (len > 31)
 650                 // can this happen at all ?
 651                 av_log(f->avctx, AV_LOG_ERROR,
 652                        "vlc length overflow\n");
 653         }
 654
 655         bits_tab[j] = bits;
 656         len_tab[j]  = len;
 657     }
 658
 659     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 660                  bits_tab, 4, 4, 0))
 661         return NULL;
 662
 663     return ptr;
 664 }
 665
 666 static int mix(int c0, int c1)
 667 {
 668     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 669     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 670     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 671     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 672 }
 673
 674 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
 675 {
 676     int x, y, x2, y2;
 677     const int width  = f->avctx->width;
 678     const int height = f->avctx->height;
 679     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 680     uint16_t *dst    = (uint16_t*)f->current_picture.data[0];
 681     const int stride =            f->current_picture.linesize[0]>>1;
 682     const uint8_t *buf_end = buf + length;
 683     GetByteContext g3;
 684
 685     if (length < mbs * 8) {
 686         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 687         return AVERROR_INVALIDDATA;
 688     }
 689     bytestream2_init(&g3, buf, length);
 690
 691     for (y = 0; y < height; y += 16) {
 692         for (x = 0; x < width; x += 16) {
 693             unsigned int color[4] = { 0 }, bits;
 694             if (buf_end - buf < 8)
 695                 return -1;
 696             // warning following is purely guessed ...
 697             color[0] = bytestream2_get_le16u(&g3);
 698             color[1] = bytestream2_get_le16u(&g3);
 699
 700             if (color[0] & 0x8000)
 701                 av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 702             if (color[1] & 0x8000)
 703                 av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 704
 705             color[2] = mix(color[0], color[1]);
 706             color[3] = mix(color[1], color[0]);
 707
 708             bits = bytestream2_get_le32u(&g3);
 709             for (y2 = 0; y2 < 16; y2++) {
 710                 for (x2 = 0; x2 < 16; x2++) {
 711                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 712                     dst[y2 * stride + x2] = color[(bits >> index) & 3];
 713                 }
 714             }
 715             dst += 16;
 716         }
 717         dst += 16 * stride - x;
 718     }
 719
 720     return 0;
 721 }
 722
 723 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
 724 {
 725     int x, y;
 726     const int width  = f->avctx->width;
 727     const int height = f->avctx->height;
 728     const unsigned int bitstream_size = AV_RL32(buf);
 729     unsigned int prestream_size;
 730     const uint8_t *prestream;
 731
 732     if (bitstream_size > (1<<26) || length < bitstream_size + 12) {
 733         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 734         return AVERROR_INVALIDDATA;
 735     }
 736
 737     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 738     prestream      =             buf + bitstream_size + 12;
 739
 740     if (prestream_size + bitstream_size + 12 != length
 741         || bitstream_size > (1 << 26)
 742         || prestream_size > (1 << 26)) {
 743         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 744                prestream_size, bitstream_size, length);
 745         return -1;
 746     }
 747
 748     prestream = read_huffman_tables(f, prestream, buf + length - prestream);
 749     if (!prestream)
 750         return -1;
 751
 752     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 753
 754     prestream_size = length + buf - prestream;
 755
 756     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 757                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 758     if (!f->bitstream_buffer)
 759         return AVERROR(ENOMEM);
 760     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
 761                      prestream_size / 4);
 762     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 763            0, FF_INPUT_BUFFER_PADDING_SIZE);
 764     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 765
 766     f->last_dc = 0 * 128 * 8 * 8;
 767
 768     for (y = 0; y < height; y += 16) {
 769         for (x = 0; x < width; x += 16) {
 770             if (decode_i_mb(f) < 0)
 771                 return -1;
 772
 773             idct_put(f, x, y);
 774         }
 775     }
 776
 777     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 778         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 779
 780     return 0;
 781 }
 782
 783 static int decode_frame(AVCodecContext *avctx, void *data,
 784                         int *data_size, AVPacket *avpkt)
 785 {
 786     const uint8_t *buf    = avpkt->data;
 787     int buf_size          = avpkt->size;
 788     FourXContext *const f = avctx->priv_data;
 789     AVFrame *picture      = data;
 790     AVFrame *p, temp;
 791     int i, frame_4cc, frame_size;
 792
 793     if (buf_size < 12)
 794         return AVERROR_INVALIDDATA;
 795     frame_4cc = AV_RL32(buf);
 796     if (buf_size != AV_RL32(buf + 4) + 8 || buf_size < 20)
 797         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n",
 798                buf_size, AV_RL32(buf + 4));
 799
 800     if (frame_4cc == AV_RL32("cfrm")) {
 801         int free_index       = -1;
 802         const int data_size  = buf_size - 20;
 803         const int id         = AV_RL32(buf + 12);
 804         const int whole_size = AV_RL32(buf + 16);
 805         CFrameBuffer *cfrm;
 806
 807         if (data_size < 0 || whole_size < 0) {
 808             av_log(f->avctx, AV_LOG_ERROR, "sizes invalid\n");
 809             return AVERROR_INVALIDDATA;
 810         }
 811
 812         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 813             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 814                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 815                        f->cfrm[i].id);
 816
 817         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 818             if (f->cfrm[i].id == id)
 819                 break;
 820             if (f->cfrm[i].size == 0)
 821                 free_index = i;
 822         }
 823
 824         if (i >= CFRAME_BUFFER_COUNT) {
 825             i             = free_index;
 826             f->cfrm[i].id = id;
 827         }
 828         cfrm = &f->cfrm[i];
 829
 830         if (data_size > UINT_MAX -  cfrm->size - FF_INPUT_BUFFER_PADDING_SIZE)
 831             return AVERROR_INVALIDDATA;
 832
 833         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 834                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 835         // explicit check needed as memcpy below might not catch a NULL
 836         if (!cfrm->data) {
 837             av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
 838             return -1;
 839         }
 840
 841         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 842         cfrm->size += data_size;
 843
 844         if (cfrm->size >= whole_size) {
 845             buf        = cfrm->data;
 846             frame_size = cfrm->size;
 847
 848             if (id != avctx->frame_number)
 849                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 850                        id, avctx->frame_number);
 851
 852             cfrm->size = cfrm->id = 0;
 853             frame_4cc  = AV_RL32("pfrm");
 854         } else
 855             return buf_size;
 856     } else {
 857         buf        = buf      + 12;
 858         frame_size = buf_size - 12;
 859     }
 860
 861     temp               = f->current_picture;
 862     f->current_picture = f->last_picture;
 863     f->last_picture    = temp;
 864
 865     p                  = &f->current_picture;
 866     avctx->coded_frame = p;
 867
 868     // alternatively we would have to use our own buffer management
 869     avctx->flags |= CODEC_FLAG_EMU_EDGE;
 870
 871     p->reference= 3;
 872     if (avctx->reget_buffer(avctx, p) < 0) {
 873         av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
 874         return -1;
 875     }
 876
 877     if (frame_4cc == AV_RL32("ifr2")) {
 878         p->pict_type= AV_PICTURE_TYPE_I;
 879         if (decode_i2_frame(f, buf - 4, frame_size + 4) < 0) {
 880             av_log(f->avctx, AV_LOG_ERROR, "decode i2 frame failed\n");
 881             return -1;
 882         }
 883     } else if (frame_4cc == AV_RL32("ifrm")) {
 884         p->pict_type= AV_PICTURE_TYPE_I;
 885         if (decode_i_frame(f, buf, frame_size) < 0) {
 886             av_log(f->avctx, AV_LOG_ERROR, "decode i frame failed\n");
 887             return -1;
 888         }
 889     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 890         if (!f->last_picture.data[0]) {
 891             f->last_picture.reference = 3;
 892             if (avctx->get_buffer(avctx, &f->last_picture) < 0) {
 893                 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 894                 return -1;
 895             }
 896         }
 897
 898         p->pict_type = AV_PICTURE_TYPE_P;
 899         if (decode_p_frame(f, buf, frame_size) < 0) {
 900             av_log(f->avctx, AV_LOG_ERROR, "decode p frame failed\n");
 901             return -1;
 902         }
 903     } else if (frame_4cc == AV_RL32("snd_")) {
 904         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 905                buf_size);
 906     } else {
 907         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 908                buf_size);
 909     }
 910
 911     p->key_frame = p->pict_type == AV_PICTURE_TYPE_I;
 912
 913     *picture   = *p;
 914     *data_size = sizeof(AVPicture);
 915
 916     emms_c();
 917
 918     return buf_size;
 919 }
 920
 921
 922 static av_cold void common_init(AVCodecContext *avctx)
 923 {
 924     FourXContext * const f = avctx->priv_data;
 925
 926     ff_dsputil_init(&f->dsp, avctx);
 927
 928     f->avctx = avctx;
 929 }
 930
 931 static av_cold int decode_init(AVCodecContext *avctx)
 932 {
 933     FourXContext * const f = avctx->priv_data;
 934
 935     if (avctx->extradata_size != 4 || !avctx->extradata) {
 936         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 937         return 1;
 938     }
 939     if((avctx->width % 16) || (avctx->height % 16)) {
 940         av_log(avctx, AV_LOG_ERROR, "unsupported width/height\n");
 941         return AVERROR_INVALIDDATA;
 942     }
 943
 944     avcodec_get_frame_defaults(&f->current_picture);
 945     avcodec_get_frame_defaults(&f->last_picture);
 946     f->version = AV_RL32(avctx->extradata) >> 16;
 947     common_init(avctx);
 948     init_vlcs(f);
 949
 950     if (f->version > 2)
 951         avctx->pix_fmt = PIX_FMT_RGB565;
 952     else
 953         avctx->pix_fmt = PIX_FMT_BGR555;
 954
 955     return 0;
 956 }
 957
 958
 959 static av_cold int decode_end(AVCodecContext *avctx)
 960 {
 961     FourXContext * const f = avctx->priv_data;
 962     int i;
 963
 964     av_freep(&f->bitstream_buffer);
 965     f->bitstream_buffer_size = 0;
 966     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 967         av_freep(&f->cfrm[i].data);
 968         f->cfrm[i].allocated_size = 0;
 969     }
 970     ff_free_vlc(&f->pre_vlc);
 971     if (f->current_picture.data[0])
 972         avctx->release_buffer(avctx, &f->current_picture);
 973     if (f->last_picture.data[0])
 974         avctx->release_buffer(avctx, &f->last_picture);
 975
 976     return 0;
 977 }
 978
 979 AVCodec ff_fourxm_decoder = {
 980     .name           = "4xm",
 981     .type           = AVMEDIA_TYPE_VIDEO,
 982     .id             = CODEC_ID_4XM,
 983     .priv_data_size = sizeof(FourXContext),
 984     .init           = decode_init,
 985     .close          = decode_end,
 986     .decode         = decode_frame,
 987     .capabilities   = CODEC_CAP_DR1,
 988     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 989 };