git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include <inttypes.h>
  28
  29 #include "libavutil/frame.h"
  30 #include "libavutil/imgutils.h"
  31 #include "libavutil/intreadwrite.h"
  32 #include "avcodec.h"
  33 #include "blockdsp.h"
  34 #include "bswapdsp.h"
  35 #include "bytestream.h"
  36 #include "get_bits.h"
  37 #include "internal.h"
  38
  39 #define BLOCK_TYPE_VLC_BITS 5
  40 #define ACDC_VLC_BITS 9
  41
  42 #define CFRAME_BUFFER_COUNT 100
  43
  44 static const uint8_t block_type_tab[2][4][8][2] = {
  45     {
  46         {    // { 8, 4, 2 } x { 8, 4, 2}
  47             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  48         }, { // { 8, 4 } x 1
  49             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  50         }, { // 1 x { 8, 4 }
  51             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  52         }, { // 1 x 2, 2 x 1
  53             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  54         }
  55     }, {
  56         {   // { 8, 4, 2 } x { 8, 4, 2}
  57             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  58         }, {// { 8, 4 } x 1
  59             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  60         }, {// 1 x { 8, 4 }
  61             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  62         }, {// 1 x 2, 2 x 1
  63             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  64       }
  65     }
  66 };
  67
  68 static const uint8_t size2index[4][4] = {
  69     { -1, 3, 1, 1 },
  70     {  3, 0, 0, 0 },
  71     {  2, 0, 0, 0 },
  72     {  2, 0, 0, 0 },
  73 };
  74
  75 static const int8_t mv[256][2] = {
  76     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  77     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  78     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  79     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  80     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  81     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  82     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  83     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  84     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  85     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  86     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  87     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  88     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  89     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  90     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  91     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  92     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  93     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  94     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  95     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  96     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  97     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  98     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  99     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
 100     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
 101     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
 102     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 103     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 104     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 105     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 106     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 107     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 108 };
 109
 110 /* This is simply the scaled down elementwise product of the standard JPEG
 111  * quantizer table and the AAN premul table. */
 112 static const uint8_t dequant_table[64] = {
 113     16, 15, 13, 19, 24, 31, 28, 17,
 114     17, 23, 25, 31, 36, 63, 45, 21,
 115     18, 24, 27, 37, 52, 59, 49, 20,
 116     16, 28, 34, 40, 60, 80, 51, 20,
 117     18, 31, 48, 66, 68, 86, 56, 21,
 118     19, 38, 56, 59, 64, 64, 48, 20,
 119     27, 48, 55, 55, 56, 51, 35, 15,
 120     20, 35, 34, 32, 31, 22, 15,  8,
 121 };
 122
 123 static VLC block_type_vlc[2][4];
 124
 125
 126 typedef struct CFrameBuffer {
 127     unsigned int allocated_size;
 128     unsigned int size;
 129     int id;
 130     uint8_t *data;
 131 } CFrameBuffer;
 132
 133 typedef struct FourXContext {
 134     AVCodecContext *avctx;
 135     BlockDSPContext bdsp;
 136     BswapDSPContext bbdsp;
 137     uint16_t *frame_buffer;
 138     uint16_t *last_frame_buffer;
 139     GetBitContext pre_gb;          ///< ac/dc prefix
 140     GetBitContext gb;
 141     GetByteContext g;
 142     GetByteContext g2;
 143     int mv[256];
 144     VLC pre_vlc;
 145     int last_dc;
 146     DECLARE_ALIGNED(16, int16_t, block)[6][64];
 147     void *bitstream_buffer;
 148     unsigned int bitstream_buffer_size;
 149     int version;
 150     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 151 } FourXContext;
 152
 153
 154 #define FIX_1_082392200  70936
 155 #define FIX_1_414213562  92682
 156 #define FIX_1_847759065 121095
 157 #define FIX_2_613125930 171254
 158
 159 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 160
 161 static void idct(int16_t block[64])
 162 {
 163     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 164     int tmp10, tmp11, tmp12, tmp13;
 165     int z5, z10, z11, z12, z13;
 166     int i;
 167     int temp[64];
 168
 169     for (i = 0; i < 8; i++) {
 170         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 171         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 172
 173         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 174         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 175
 176         tmp0 = tmp10 + tmp13;
 177         tmp3 = tmp10 - tmp13;
 178         tmp1 = tmp11 + tmp12;
 179         tmp2 = tmp11 - tmp12;
 180
 181         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 182         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 183         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 184         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 185
 186         tmp7  =          z11 + z13;
 187         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 188
 189         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 190         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 191         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 192
 193         tmp6 = tmp12 - tmp7;
 194         tmp5 = tmp11 - tmp6;
 195         tmp4 = tmp10 + tmp5;
 196
 197         temp[8 * 0 + i] = tmp0 + tmp7;
 198         temp[8 * 7 + i] = tmp0 - tmp7;
 199         temp[8 * 1 + i] = tmp1 + tmp6;
 200         temp[8 * 6 + i] = tmp1 - tmp6;
 201         temp[8 * 2 + i] = tmp2 + tmp5;
 202         temp[8 * 5 + i] = tmp2 - tmp5;
 203         temp[8 * 4 + i] = tmp3 + tmp4;
 204         temp[8 * 3 + i] = tmp3 - tmp4;
 205     }
 206
 207     for (i = 0; i < 8 * 8; i += 8) {
 208         tmp10 = temp[0 + i] + temp[4 + i];
 209         tmp11 = temp[0 + i] - temp[4 + i];
 210
 211         tmp13 = temp[2 + i] + temp[6 + i];
 212         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 213
 214         tmp0 = tmp10 + tmp13;
 215         tmp3 = tmp10 - tmp13;
 216         tmp1 = tmp11 + tmp12;
 217         tmp2 = tmp11 - tmp12;
 218
 219         z13 = temp[5 + i] + temp[3 + i];
 220         z10 = temp[5 + i] - temp[3 + i];
 221         z11 = temp[1 + i] + temp[7 + i];
 222         z12 = temp[1 + i] - temp[7 + i];
 223
 224         tmp7  = z11 + z13;
 225         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 226
 227         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 228         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 229         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 230
 231         tmp6 = tmp12 - tmp7;
 232         tmp5 = tmp11 - tmp6;
 233         tmp4 = tmp10 + tmp5;
 234
 235         block[0 + i] = (tmp0 + tmp7) >> 6;
 236         block[7 + i] = (tmp0 - tmp7) >> 6;
 237         block[1 + i] = (tmp1 + tmp6) >> 6;
 238         block[6 + i] = (tmp1 - tmp6) >> 6;
 239         block[2 + i] = (tmp2 + tmp5) >> 6;
 240         block[5 + i] = (tmp2 - tmp5) >> 6;
 241         block[4 + i] = (tmp3 + tmp4) >> 6;
 242         block[3 + i] = (tmp3 - tmp4) >> 6;
 243     }
 244 }
 245
 246 static av_cold void init_vlcs(FourXContext *f)
 247 {
 248     static VLC_TYPE table[2][4][32][2];
 249     int i, j;
 250
 251     for (i = 0; i < 2; i++) {
 252         for (j = 0; j < 4; j++) {
 253             block_type_vlc[i][j].table           = table[i][j];
 254             block_type_vlc[i][j].table_allocated = 32;
 255             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 256                      &block_type_tab[i][j][0][1], 2, 1,
 257                      &block_type_tab[i][j][0][0], 2, 1,
 258                      INIT_VLC_USE_NEW_STATIC);
 259         }
 260     }
 261 }
 262
 263 static void init_mv(FourXContext *f, int linesize)
 264 {
 265     int i;
 266
 267     for (i = 0; i < 256; i++) {
 268         if (f->version > 1)
 269             f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
 270         else
 271             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
 272     }
 273 }
 274
 275 #if HAVE_BIGENDIAN
 276 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 277     {                                                   \
 278         unsigned tmpval = AV_RN32(src);                 \
 279         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 280         tmpval = tmpval * (scale) + (dc);               \
 281         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 282         AV_WN32A(dst, tmpval);                          \
 283     }
 284 #else
 285 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 286     {                                                    \
 287         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 288         AV_WN32A(dst, tmpval);                           \
 289     }
 290 #endif
 291
 292 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 293                         int h, int stride, int scale, unsigned dc)
 294 {
 295     int i;
 296     dc *= 0x10001;
 297
 298     switch (log2w) {
 299     case 0:
 300         for (i = 0; i < h; i++) {
 301             dst[0] = scale * src[0] + dc;
 302             if (scale)
 303                 src += stride;
 304             dst += stride;
 305         }
 306         break;
 307     case 1:
 308         for (i = 0; i < h; i++) {
 309             LE_CENTRIC_MUL(dst, src, scale, dc);
 310             if (scale)
 311                 src += stride;
 312             dst += stride;
 313         }
 314         break;
 315     case 2:
 316         for (i = 0; i < h; i++) {
 317             LE_CENTRIC_MUL(dst, src, scale, dc);
 318             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 319             if (scale)
 320                 src += stride;
 321             dst += stride;
 322         }
 323         break;
 324     case 3:
 325         for (i = 0; i < h; i++) {
 326             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 327             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 328             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 329             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 330             if (scale)
 331                 src += stride;
 332             dst += stride;
 333         }
 334         break;
 335     default:
 336         break;
 337     }
 338 }
 339
 340 static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 341                           int log2w, int log2h, int stride)
 342 {
 343     const int index = size2index[log2h][log2w];
 344     const int h     = 1 << log2h;
 345     int code        = get_vlc2(&f->gb,
 346                                block_type_vlc[1 - (f->version > 1)][index].table,
 347                                BLOCK_TYPE_VLC_BITS, 1);
 348     uint16_t *start = f->last_frame_buffer;
 349     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 350     int ret;
 351     int scale   = 1;
 352     unsigned dc = 0;
 353
 354     if (code < 0 || code > 6 || log2w < 0)
 355         return AVERROR_INVALIDDATA;
 356
 357     if (code == 1) {
 358         log2h--;
 359         if ((ret = decode_p_block(f, dst, src, log2w, log2h, stride)) < 0)
 360             return ret;
 361         return decode_p_block(f, dst + (stride << log2h),
 362                               src + (stride << log2h),
 363                               log2w, log2h, stride);
 364     } else if (code == 2) {
 365         log2w--;
 366         if ((ret = decode_p_block(f, dst , src, log2w, log2h, stride)) < 0)
 367             return ret;
 368         return decode_p_block(f, dst + (1 << log2w),
 369                               src + (1 << log2w),
 370                               log2w, log2h, stride);
 371     } else if (code == 6) {
 372         if (log2w) {
 373             dst[0]      = bytestream2_get_le16(&f->g2);
 374             dst[1]      = bytestream2_get_le16(&f->g2);
 375         } else {
 376             dst[0]      = bytestream2_get_le16(&f->g2);
 377             dst[stride] = bytestream2_get_le16(&f->g2);
 378         }
 379         return 0;
 380     }
 381
 382     if (code == 0) {
 383         src  += f->mv[bytestream2_get_byte(&f->g)];
 384     } else if (code == 3 && f->version >= 2) {
 385         return 0;
 386     } else if (code == 4) {
 387         src  += f->mv[bytestream2_get_byte(&f->g)];
 388         dc    = bytestream2_get_le16(&f->g2);
 389     } else if (code == 5) {
 390         scale = 0;
 391         dc    = bytestream2_get_le16(&f->g2);
 392     }
 393
 394     if (start > src || src > end) {
 395         av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 396         return AVERROR_INVALIDDATA;
 397     }
 398
 399     mcdc(dst, src, log2w, h, stride, scale, dc);
 400
 401     return 0;
 402 }
 403
 404 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
 405 {
 406     int x, y;
 407     const int width  = f->avctx->width;
 408     const int height = f->avctx->height;
 409     uint16_t *dst    = f->frame_buffer;
 410     uint16_t *src;
 411     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 412                  bytestream_offset, wordstream_offset;
 413     int ret;
 414
 415     src = f->last_frame_buffer;
 416
 417     if (f->version > 1) {
 418         if (length < 20)
 419             return AVERROR_INVALIDDATA;
 420         extra           = 20;
 421         bitstream_size  = AV_RL32(buf + 8);
 422         wordstream_size = AV_RL32(buf + 12);
 423         bytestream_size = AV_RL32(buf + 16);
 424     } else {
 425         extra           = 0;
 426         bitstream_size  = AV_RL16(buf - 4);
 427         wordstream_size = AV_RL16(buf - 2);
 428         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 429     }
 430
 431     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 432         || bitstream_size  > (1 << 26)
 433         || bytestream_size > (1 << 26)
 434         || wordstream_size > (1 << 26)) {
 435         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 436                bitstream_size, bytestream_size, wordstream_size,
 437                bitstream_size + bytestream_size + wordstream_size - length);
 438         return AVERROR_INVALIDDATA;
 439     }
 440
 441     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 442                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 443     if (!f->bitstream_buffer)
 444         return AVERROR(ENOMEM);
 445     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
 446                        bitstream_size / 4);
 447     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 448            0, FF_INPUT_BUFFER_PADDING_SIZE);
 449     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 450
 451     wordstream_offset = extra + bitstream_size;
 452     bytestream_offset = extra + bitstream_size + wordstream_size;
 453     bytestream2_init(&f->g2, buf + wordstream_offset,
 454                      length - wordstream_offset);
 455     bytestream2_init(&f->g, buf + bytestream_offset,
 456                      length - bytestream_offset);
 457
 458     init_mv(f, width * 2);
 459
 460     for (y = 0; y < height; y += 8) {
 461         for (x = 0; x < width; x += 8)
 462             if ((ret = decode_p_block(f, dst + x, src + x, 3, 3, width)) < 0)
 463                 return ret;
 464         src += 8 * width;
 465         dst += 8 * width;
 466     }
 467
 468     return 0;
 469 }
 470
 471 /**
 472  * decode block and dequantize.
 473  * Note this is almost identical to MJPEG.
 474  */
 475 static int decode_i_block(FourXContext *f, int16_t *block)
 476 {
 477     int code, i, j, level, val;
 478
 479     /* DC coef */
 480     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 481     if (val >> 4)
 482         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 483
 484     if (val)
 485         val = get_xbits(&f->gb, val);
 486
 487     val        = val * dequant_table[0] + f->last_dc;
 488     f->last_dc = block[0] = val;
 489     /* AC coefs */
 490     i = 1;
 491     for (;;) {
 492         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 493
 494         /* EOB */
 495         if (code == 0)
 496             break;
 497         if (code == 0xf0) {
 498             i += 16;
 499         } else {
 500             level = get_xbits(&f->gb, code & 0xf);
 501             i    += code >> 4;
 502             if (i >= 64) {
 503                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 504                 return 0;
 505             }
 506
 507             j = ff_zigzag_direct[i];
 508             block[j] = level * dequant_table[j];
 509             i++;
 510             if (i >= 64)
 511                 break;
 512         }
 513     }
 514
 515     return 0;
 516 }
 517
 518 static inline void idct_put(FourXContext *f, int x, int y)
 519 {
 520     int16_t (*block)[64] = f->block;
 521     int stride           = f->avctx->width;
 522     int i;
 523     uint16_t *dst = f->frame_buffer + y * stride + x;
 524
 525     for (i = 0; i < 4; i++) {
 526         block[i][0] += 0x80 * 8 * 8;
 527         idct(block[i]);
 528     }
 529
 530     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 531         for (i = 4; i < 6; i++)
 532             idct(block[i]);
 533     }
 534
 535     /* Note transform is:
 536      * y  = ( 1b + 4g + 2r) / 14
 537      * cb = ( 3b - 2g - 1r) / 14
 538      * cr = (-1b - 4g + 5r) / 14 */
 539     for (y = 0; y < 8; y++) {
 540         for (x = 0; x < 8; x++) {
 541             int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
 542                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 543             int cb = block[4][x + 8 * y];
 544             int cr = block[5][x + 8 * y];
 545             int cg = (cb + cr) >> 1;
 546             int y;
 547
 548             cb += cb;
 549
 550             y               = temp[0];
 551             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 552             y               = temp[1];
 553             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 554             y               = temp[8];
 555             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 556             y               = temp[9];
 557             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 558             dst            += 2;
 559         }
 560         dst += 2 * stride - 2 * 8;
 561     }
 562 }
 563
 564 static int decode_i_mb(FourXContext *f)
 565 {
 566     int ret;
 567     int i;
 568
 569     f->bdsp.clear_blocks(f->block[0]);
 570
 571     for (i = 0; i < 6; i++)
 572         if ((ret = decode_i_block(f, f->block[i])) < 0)
 573             return ret;
 574
 575     return 0;
 576 }
 577
 578 static const uint8_t *read_huffman_tables(FourXContext *f,
 579                                           const uint8_t * const buf,
 580                                           int len)
 581 {
 582     int frequency[512] = { 0 };
 583     uint8_t flag[512];
 584     int up[512];
 585     uint8_t len_tab[257];
 586     int bits_tab[257];
 587     int start, end;
 588     const uint8_t *ptr = buf;
 589     int j;
 590
 591     memset(up, -1, sizeof(up));
 592
 593     start = *ptr++;
 594     end   = *ptr++;
 595     for (;;) {
 596         int i;
 597
 598         len -= end - start + 1;
 599
 600         if (end < start || len < 0)
 601             return NULL;
 602
 603         for (i = start; i <= end; i++)
 604             frequency[i] = *ptr++;
 605         start = *ptr++;
 606         if (start == 0)
 607             break;
 608
 609         if (--len < 0)
 610             return NULL;
 611
 612         end = *ptr++;
 613     }
 614     frequency[256] = 1;
 615
 616     while ((ptr - buf) & 3)
 617         ptr++; // 4byte align
 618
 619     for (j = 257; j < 512; j++) {
 620         int min_freq[2] = { 256 * 256, 256 * 256 };
 621         int smallest[2] = { 0, 0 };
 622         int i;
 623         for (i = 0; i < j; i++) {
 624             if (frequency[i] == 0)
 625                 continue;
 626             if (frequency[i] < min_freq[1]) {
 627                 if (frequency[i] < min_freq[0]) {
 628                     min_freq[1] = min_freq[0];
 629                     smallest[1] = smallest[0];
 630                     min_freq[0] = frequency[i];
 631                     smallest[0] = i;
 632                 } else {
 633                     min_freq[1] = frequency[i];
 634                     smallest[1] = i;
 635                 }
 636             }
 637         }
 638         if (min_freq[1] == 256 * 256)
 639             break;
 640
 641         frequency[j]           = min_freq[0] + min_freq[1];
 642         flag[smallest[0]]      = 0;
 643         flag[smallest[1]]      = 1;
 644         up[smallest[0]]        =
 645         up[smallest[1]]        = j;
 646         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 647     }
 648
 649     for (j = 0; j < 257; j++) {
 650         int node, len = 0, bits = 0;
 651
 652         for (node = j; up[node] != -1; node = up[node]) {
 653             bits += flag[node] << len;
 654             len++;
 655             if (len > 31)
 656                 // can this happen at all ?
 657                 av_log(f->avctx, AV_LOG_ERROR,
 658                        "vlc length overflow\n");
 659         }
 660
 661         bits_tab[j] = bits;
 662         len_tab[j]  = len;
 663     }
 664
 665     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 666                  bits_tab, 4, 4, 0))
 667         return NULL;
 668
 669     return ptr;
 670 }
 671
 672 static int mix(int c0, int c1)
 673 {
 674     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 675     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 676     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 677     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 678 }
 679
 680 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
 681 {
 682     int x, y, x2, y2;
 683     const int width  = f->avctx->width;
 684     const int height = f->avctx->height;
 685     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 686     uint16_t *dst    = f->frame_buffer;
 687     GetByteContext g3;
 688
 689     if (length < mbs * 8) {
 690         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 691         return AVERROR_INVALIDDATA;
 692     }
 693     bytestream2_init(&g3, buf, length);
 694
 695     for (y = 0; y < height; y += 16) {
 696         for (x = 0; x < width; x += 16) {
 697             unsigned int color[4] = { 0 }, bits;
 698             // warning following is purely guessed ...
 699             color[0] = bytestream2_get_le16u(&g3);
 700             color[1] = bytestream2_get_le16u(&g3);
 701
 702             if (color[0] & 0x8000)
 703                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 1\n");
 704             if (color[1] & 0x8000)
 705                 av_log(f->avctx, AV_LOG_ERROR, "unk bit 2\n");
 706
 707             color[2] = mix(color[0], color[1]);
 708             color[3] = mix(color[1], color[0]);
 709
 710             bits = bytestream2_get_le32u(&g3);
 711             for (y2 = 0; y2 < 16; y2++) {
 712                 for (x2 = 0; x2 < 16; x2++) {
 713                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 714                     dst[y2 * width + x2] = color[(bits >> index) & 3];
 715                 }
 716             }
 717             dst += 16;
 718         }
 719         dst += 16 * width - x;
 720     }
 721
 722     return 0;
 723 }
 724
 725 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
 726 {
 727     int x, y, ret;
 728     const int width  = f->avctx->width;
 729     const int height = f->avctx->height;
 730     const unsigned int bitstream_size = AV_RL32(buf);
 731     int token_count av_unused;
 732     unsigned int prestream_size;
 733     const uint8_t *prestream;
 734
 735     if (bitstream_size > (1 << 26))
 736         return AVERROR_INVALIDDATA;
 737
 738     if (length < bitstream_size + 12) {
 739         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 740         return AVERROR_INVALIDDATA;
 741     }
 742
 743     token_count    =     AV_RL32(buf + bitstream_size + 8);
 744     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 745     prestream      =             buf + bitstream_size + 12;
 746
 747     if (prestream_size + bitstream_size + 12 != length
 748         || prestream_size > (1 << 26)) {
 749         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 750                prestream_size, bitstream_size, length);
 751         return AVERROR_INVALIDDATA;
 752     }
 753
 754     prestream = read_huffman_tables(f, prestream, prestream_size);
 755     if (!prestream) {
 756         av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
 757         return AVERROR_INVALIDDATA;
 758     }
 759
 760     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 761
 762     prestream_size = length + buf - prestream;
 763
 764     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 765                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 766     if (!f->bitstream_buffer)
 767         return AVERROR(ENOMEM);
 768     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
 769                        prestream_size / 4);
 770     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 771            0, FF_INPUT_BUFFER_PADDING_SIZE);
 772     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 773
 774     f->last_dc = 0 * 128 * 8 * 8;
 775
 776     for (y = 0; y < height; y += 16) {
 777         for (x = 0; x < width; x += 16) {
 778             if ((ret = decode_i_mb(f)) < 0)
 779                 return ret;
 780
 781             idct_put(f, x, y);
 782         }
 783     }
 784
 785     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 786         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 787
 788     return 0;
 789 }
 790
 791 static int decode_frame(AVCodecContext *avctx, void *data,
 792                         int *got_frame, AVPacket *avpkt)
 793 {
 794     const uint8_t *buf    = avpkt->data;
 795     int buf_size          = avpkt->size;
 796     FourXContext *const f = avctx->priv_data;
 797     AVFrame *picture      = data;
 798     int i, frame_4cc, frame_size, ret;
 799
 800     if (buf_size < 20)
 801         return AVERROR_INVALIDDATA;
 802
 803     if (avctx->width % 16 || avctx->height % 16) {
 804         av_log(avctx, AV_LOG_ERROR,
 805                "Dimensions non-multiple of 16 are invalid.\n");
 806         return AVERROR_INVALIDDATA;
 807     }
 808
 809     if (buf_size < AV_RL32(buf + 4) + 8) {
 810         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %"PRIu32"\n",
 811                buf_size, AV_RL32(buf + 4));
 812         return AVERROR_INVALIDDATA;
 813     }
 814
 815     frame_4cc = AV_RL32(buf);
 816
 817     if (frame_4cc == AV_RL32("cfrm")) {
 818         int free_index       = -1;
 819         int id, whole_size;
 820         const int data_size  = buf_size - 20;
 821         CFrameBuffer *cfrm;
 822
 823         if (data_size < 0)
 824             return AVERROR_INVALIDDATA;
 825
 826         id         = AV_RL32(buf + 12);
 827         whole_size = AV_RL32(buf + 16);
 828
 829         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 830             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 831                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 832                        f->cfrm[i].id);
 833
 834         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 835             if (f->cfrm[i].id == id)
 836                 break;
 837             if (f->cfrm[i].size == 0)
 838                 free_index = i;
 839         }
 840
 841         if (i >= CFRAME_BUFFER_COUNT) {
 842             i             = free_index;
 843             f->cfrm[i].id = id;
 844         }
 845         cfrm = &f->cfrm[i];
 846
 847         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 848                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 849         // explicit check needed as memcpy below might not catch a NULL
 850         if (!cfrm->data) {
 851             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 852             return AVERROR(ENOMEM);
 853         }
 854
 855         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 856         cfrm->size += data_size;
 857
 858         if (cfrm->size >= whole_size) {
 859             buf        = cfrm->data;
 860             frame_size = cfrm->size;
 861
 862             if (id != avctx->frame_number)
 863                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 864                        id, avctx->frame_number);
 865
 866             if (f->version <= 1)
 867                 return AVERROR_INVALIDDATA;
 868
 869             cfrm->size = cfrm->id = 0;
 870             frame_4cc  = AV_RL32("pfrm");
 871         } else
 872             return buf_size;
 873     } else {
 874         buf        = buf      + 12;
 875         frame_size = buf_size - 12;
 876     }
 877
 878
 879     if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
 880         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 881         return ret;
 882     }
 883
 884     if (frame_4cc == AV_RL32("ifr2")) {
 885         picture->pict_type = AV_PICTURE_TYPE_I;
 886         if ((ret = decode_i2_frame(f, buf - 4, frame_size + 4)) < 0)
 887             return ret;
 888     } else if (frame_4cc == AV_RL32("ifrm")) {
 889         picture->pict_type = AV_PICTURE_TYPE_I;
 890         if ((ret = decode_i_frame(f, buf, frame_size)) < 0)
 891             return ret;
 892     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 893         picture->pict_type = AV_PICTURE_TYPE_P;
 894         if ((ret = decode_p_frame(f, buf, frame_size)) < 0)
 895             return ret;
 896     } else if (frame_4cc == AV_RL32("snd_")) {
 897         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 898                buf_size);
 899     } else {
 900         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 901                buf_size);
 902     }
 903
 904     picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
 905
 906     av_image_copy_plane(picture->data[0], picture->linesize[0],
 907                         (const uint8_t*)f->frame_buffer,  avctx->width * 2,
 908                         avctx->width * 2, avctx->height);
 909     FFSWAP(uint16_t *, f->frame_buffer, f->last_frame_buffer);
 910
 911     *got_frame = 1;
 912
 913     emms_c();
 914
 915     return buf_size;
 916 }
 917
 918 static av_cold int decode_end(AVCodecContext *avctx)
 919 {
 920     FourXContext * const f = avctx->priv_data;
 921     int i;
 922
 923     av_freep(&f->frame_buffer);
 924     av_freep(&f->last_frame_buffer);
 925     av_freep(&f->bitstream_buffer);
 926     f->bitstream_buffer_size = 0;
 927     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 928         av_freep(&f->cfrm[i].data);
 929         f->cfrm[i].allocated_size = 0;
 930     }
 931     ff_free_vlc(&f->pre_vlc);
 932
 933     return 0;
 934 }
 935
 936 static av_cold int decode_init(AVCodecContext *avctx)
 937 {
 938     FourXContext * const f = avctx->priv_data;
 939     int ret;
 940
 941     if (avctx->extradata_size != 4 || !avctx->extradata) {
 942         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 943         return AVERROR_INVALIDDATA;
 944     }
 945
 946     ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
 947     if (ret < 0)
 948         return ret;
 949
 950     f->frame_buffer      = av_mallocz(avctx->width * avctx->height * 2);
 951     f->last_frame_buffer = av_mallocz(avctx->width * avctx->height * 2);
 952     if (!f->frame_buffer || !f->last_frame_buffer) {
 953         decode_end(avctx);
 954         return AVERROR(ENOMEM);
 955     }
 956
 957     f->version = AV_RL32(avctx->extradata) >> 16;
 958     ff_blockdsp_init(&f->bdsp, avctx);
 959     ff_bswapdsp_init(&f->bbdsp);
 960     f->avctx = avctx;
 961     init_vlcs(f);
 962
 963     if (f->version > 2)
 964         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 965     else
 966         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 967
 968     return 0;
 969 }
 970
 971 AVCodec ff_fourxm_decoder = {
 972     .name           = "4xm",
 973     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 974     .type           = AVMEDIA_TYPE_VIDEO,
 975     .id             = AV_CODEC_ID_4XM,
 976     .priv_data_size = sizeof(FourXContext),
 977     .init           = decode_init,
 978     .close          = decode_end,
 979     .decode         = decode_frame,
 980     .capabilities   = CODEC_CAP_DR1,
 981 };