git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/frame.h"
  28 #include "libavutil/intreadwrite.h"
  29 #include "avcodec.h"
  30 #include "bytestream.h"
  31 #include "dsputil.h"
  32 #include "get_bits.h"
  33 #include "internal.h"
  34
  35 //#undef NDEBUG
  36 //#include <assert.h>
  37
  38 #define BLOCK_TYPE_VLC_BITS 5
  39 #define ACDC_VLC_BITS 9
  40
  41 #define CFRAME_BUFFER_COUNT 100
  42
  43 static const uint8_t block_type_tab[2][4][8][2] = {
  44     {
  45         {    // { 8, 4, 2 } x { 8, 4, 2}
  46             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  47         }, { // { 8, 4 } x 1
  48             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  49         }, { // 1 x { 8, 4 }
  50             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  51         }, { // 1 x 2, 2 x 1
  52             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  53         }
  54     }, {
  55         {   // { 8, 4, 2 } x { 8, 4, 2}
  56             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  57         }, {// { 8, 4 } x 1
  58             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  59         }, {// 1 x { 8, 4 }
  60             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  61         }, {// 1 x 2, 2 x 1
  62             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  63       }
  64     }
  65 };
  66
  67 static const uint8_t size2index[4][4] = {
  68     { -1, 3, 1, 1 },
  69     {  3, 0, 0, 0 },
  70     {  2, 0, 0, 0 },
  71     {  2, 0, 0, 0 },
  72 };
  73
  74 static const int8_t mv[256][2] = {
  75     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  76     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  77     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  78     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  79     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  80     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  81     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  82     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  83     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  84     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  85     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  86     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  87     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  88     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  89     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  90     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  91     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  92     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  93     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  94     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  95     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  96     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  97     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  98     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
  99     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
 100     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
 101     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 102     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 103     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 104     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 105     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 106     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 107 };
 108
 109 /* This is simply the scaled down elementwise product of the standard JPEG
 110  * quantizer table and the AAN premul table. */
 111 static const uint8_t dequant_table[64] = {
 112     16, 15, 13, 19, 24, 31, 28, 17,
 113     17, 23, 25, 31, 36, 63, 45, 21,
 114     18, 24, 27, 37, 52, 59, 49, 20,
 115     16, 28, 34, 40, 60, 80, 51, 20,
 116     18, 31, 48, 66, 68, 86, 56, 21,
 117     19, 38, 56, 59, 64, 64, 48, 20,
 118     27, 48, 55, 55, 56, 51, 35, 15,
 119     20, 35, 34, 32, 31, 22, 15,  8,
 120 };
 121
 122 static VLC block_type_vlc[2][4];
 123
 124
 125 typedef struct CFrameBuffer {
 126     unsigned int allocated_size;
 127     unsigned int size;
 128     int id;
 129     uint8_t *data;
 130 } CFrameBuffer;
 131
 132 typedef struct FourXContext {
 133     AVCodecContext *avctx;
 134     DSPContext dsp;
 135     AVFrame *last_picture;
 136     GetBitContext pre_gb;          ///< ac/dc prefix
 137     GetBitContext gb;
 138     GetByteContext g;
 139     GetByteContext g2;
 140     int mv[256];
 141     VLC pre_vlc;
 142     int last_dc;
 143     DECLARE_ALIGNED(16, int16_t, block)[6][64];
 144     void *bitstream_buffer;
 145     unsigned int bitstream_buffer_size;
 146     int version;
 147     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 148 } FourXContext;
 149
 150
 151 #define FIX_1_082392200  70936
 152 #define FIX_1_414213562  92682
 153 #define FIX_1_847759065 121095
 154 #define FIX_2_613125930 171254
 155
 156 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 157
 158 static void idct(int16_t block[64])
 159 {
 160     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 161     int tmp10, tmp11, tmp12, tmp13;
 162     int z5, z10, z11, z12, z13;
 163     int i;
 164     int temp[64];
 165
 166     for (i = 0; i < 8; i++) {
 167         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 168         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 169
 170         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 171         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 172
 173         tmp0 = tmp10 + tmp13;
 174         tmp3 = tmp10 - tmp13;
 175         tmp1 = tmp11 + tmp12;
 176         tmp2 = tmp11 - tmp12;
 177
 178         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 179         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 180         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 181         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 182
 183         tmp7  =          z11 + z13;
 184         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 185
 186         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 187         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 188         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 189
 190         tmp6 = tmp12 - tmp7;
 191         tmp5 = tmp11 - tmp6;
 192         tmp4 = tmp10 + tmp5;
 193
 194         temp[8 * 0 + i] = tmp0 + tmp7;
 195         temp[8 * 7 + i] = tmp0 - tmp7;
 196         temp[8 * 1 + i] = tmp1 + tmp6;
 197         temp[8 * 6 + i] = tmp1 - tmp6;
 198         temp[8 * 2 + i] = tmp2 + tmp5;
 199         temp[8 * 5 + i] = tmp2 - tmp5;
 200         temp[8 * 4 + i] = tmp3 + tmp4;
 201         temp[8 * 3 + i] = tmp3 - tmp4;
 202     }
 203
 204     for (i = 0; i < 8 * 8; i += 8) {
 205         tmp10 = temp[0 + i] + temp[4 + i];
 206         tmp11 = temp[0 + i] - temp[4 + i];
 207
 208         tmp13 = temp[2 + i] + temp[6 + i];
 209         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 210
 211         tmp0 = tmp10 + tmp13;
 212         tmp3 = tmp10 - tmp13;
 213         tmp1 = tmp11 + tmp12;
 214         tmp2 = tmp11 - tmp12;
 215
 216         z13 = temp[5 + i] + temp[3 + i];
 217         z10 = temp[5 + i] - temp[3 + i];
 218         z11 = temp[1 + i] + temp[7 + i];
 219         z12 = temp[1 + i] - temp[7 + i];
 220
 221         tmp7  = z11 + z13;
 222         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 223
 224         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 225         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 226         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 227
 228         tmp6 = tmp12 - tmp7;
 229         tmp5 = tmp11 - tmp6;
 230         tmp4 = tmp10 + tmp5;
 231
 232         block[0 + i] = (tmp0 + tmp7) >> 6;
 233         block[7 + i] = (tmp0 - tmp7) >> 6;
 234         block[1 + i] = (tmp1 + tmp6) >> 6;
 235         block[6 + i] = (tmp1 - tmp6) >> 6;
 236         block[2 + i] = (tmp2 + tmp5) >> 6;
 237         block[5 + i] = (tmp2 - tmp5) >> 6;
 238         block[4 + i] = (tmp3 + tmp4) >> 6;
 239         block[3 + i] = (tmp3 - tmp4) >> 6;
 240     }
 241 }
 242
 243 static av_cold void init_vlcs(FourXContext *f)
 244 {
 245     static VLC_TYPE table[2][4][32][2];
 246     int i, j;
 247
 248     for (i = 0; i < 2; i++) {
 249         for (j = 0; j < 4; j++) {
 250             block_type_vlc[i][j].table           = table[i][j];
 251             block_type_vlc[i][j].table_allocated = 32;
 252             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 253                      &block_type_tab[i][j][0][1], 2, 1,
 254                      &block_type_tab[i][j][0][0], 2, 1,
 255                      INIT_VLC_USE_NEW_STATIC);
 256         }
 257     }
 258 }
 259
 260 static void init_mv(FourXContext *f, int linesize)
 261 {
 262     int i;
 263
 264     for (i = 0; i < 256; i++) {
 265         if (f->version > 1)
 266             f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
 267         else
 268             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
 269     }
 270 }
 271
 272 #if HAVE_BIGENDIAN
 273 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 274     {                                                   \
 275         unsigned tmpval = AV_RN32(src);                 \
 276         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 277         tmpval = tmpval * (scale) + (dc);               \
 278         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 279         AV_WN32A(dst, tmpval);                          \
 280     }
 281 #else
 282 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 283     {                                                    \
 284         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 285         AV_WN32A(dst, tmpval);                           \
 286     }
 287 #endif
 288
 289 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 290                         int h, int stride, int scale, unsigned dc)
 291 {
 292     int i;
 293     dc *= 0x10001;
 294
 295     switch (log2w) {
 296     case 0:
 297         for (i = 0; i < h; i++) {
 298             dst[0] = scale * src[0] + dc;
 299             if (scale)
 300                 src += stride;
 301             dst += stride;
 302         }
 303         break;
 304     case 1:
 305         for (i = 0; i < h; i++) {
 306             LE_CENTRIC_MUL(dst, src, scale, dc);
 307             if (scale)
 308                 src += stride;
 309             dst += stride;
 310         }
 311         break;
 312     case 2:
 313         for (i = 0; i < h; i++) {
 314             LE_CENTRIC_MUL(dst, src, scale, dc);
 315             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 316             if (scale)
 317                 src += stride;
 318             dst += stride;
 319         }
 320         break;
 321     case 3:
 322         for (i = 0; i < h; i++) {
 323             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 324             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 325             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 326             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 327             if (scale)
 328                 src += stride;
 329             dst += stride;
 330         }
 331         break;
 332     default:
 333         assert(0);
 334     }
 335 }
 336
 337 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 338                            int log2w, int log2h, int stride)
 339 {
 340     const int index = size2index[log2h][log2w];
 341     const int h     = 1 << log2h;
 342     int code        = get_vlc2(&f->gb,
 343                                block_type_vlc[1 - (f->version > 1)][index].table,
 344                                BLOCK_TYPE_VLC_BITS, 1);
 345     uint16_t *start = (uint16_t *)f->last_picture->data[0];
 346     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 347
 348     assert(code >= 0 && code <= 6);
 349
 350     if (code == 0) {
 351         src += f->mv[bytestream2_get_byte(&f->g)];
 352         if (start > src || src > end) {
 353             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 354             return;
 355         }
 356         mcdc(dst, src, log2w, h, stride, 1, 0);
 357     } else if (code == 1) {
 358         log2h--;
 359         decode_p_block(f, dst, src, log2w, log2h, stride);
 360         decode_p_block(f, dst + (stride << log2h),
 361                           src + (stride << log2h), log2w, log2h, stride);
 362     } else if (code == 2) {
 363         log2w--;
 364         decode_p_block(f, dst , src, log2w, log2h, stride);
 365         decode_p_block(f, dst + (1 << log2w),
 366                           src + (1 << log2w), log2w, log2h, stride);
 367     } else if (code == 3 && f->version < 2) {
 368         mcdc(dst, src, log2w, h, stride, 1, 0);
 369     } else if (code == 4) {
 370         src += f->mv[bytestream2_get_byte(&f->g)];
 371         if (start > src || src > end) {
 372             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 373             return;
 374         }
 375         mcdc(dst, src, log2w, h, stride, 1, bytestream2_get_le16(&f->g2));
 376     } else if (code == 5) {
 377         mcdc(dst, src, log2w, h, stride, 0, bytestream2_get_le16(&f->g2));
 378     } else if (code == 6) {
 379         if (log2w) {
 380             dst[0]      = bytestream2_get_le16(&f->g2);
 381             dst[1]      = bytestream2_get_le16(&f->g2);
 382         } else {
 383             dst[0]      = bytestream2_get_le16(&f->g2);
 384             dst[stride] = bytestream2_get_le16(&f->g2);
 385         }
 386     }
 387 }
 388
 389 static int decode_p_frame(FourXContext *f, AVFrame *frame,
 390                           const uint8_t *buf, int length)
 391 {
 392     int x, y;
 393     const int width  = f->avctx->width;
 394     const int height = f->avctx->height;
 395     uint16_t *src    = (uint16_t *)f->last_picture->data[0];
 396     uint16_t *dst    = (uint16_t *)frame->data[0];
 397     const int stride =             frame->linesize[0] >> 1;
 398     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 399                  bytestream_offset, wordstream_offset;
 400
 401     if (f->version > 1) {
 402         extra           = 20;
 403         bitstream_size  = AV_RL32(buf + 8);
 404         wordstream_size = AV_RL32(buf + 12);
 405         bytestream_size = AV_RL32(buf + 16);
 406     } else {
 407         extra           = 0;
 408         bitstream_size  = AV_RL16(buf - 4);
 409         wordstream_size = AV_RL16(buf - 2);
 410         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 411     }
 412
 413     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 414         || bitstream_size  > (1 << 26)
 415         || bytestream_size > (1 << 26)
 416         || wordstream_size > (1 << 26)) {
 417         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 418                bitstream_size, bytestream_size, wordstream_size,
 419                bitstream_size + bytestream_size + wordstream_size - length);
 420         return AVERROR_INVALIDDATA;
 421     }
 422
 423     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 424                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 425     if (!f->bitstream_buffer)
 426         return AVERROR(ENOMEM);
 427     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
 428                      bitstream_size / 4);
 429     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 430            0, FF_INPUT_BUFFER_PADDING_SIZE);
 431     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 432
 433     wordstream_offset = extra + bitstream_size;
 434     bytestream_offset = extra + bitstream_size + wordstream_size;
 435     bytestream2_init(&f->g2, buf + wordstream_offset,
 436                      length - wordstream_offset);
 437     bytestream2_init(&f->g, buf + bytestream_offset,
 438                      length - bytestream_offset);
 439
 440     init_mv(f, frame->linesize[0]);
 441
 442     for (y = 0; y < height; y += 8) {
 443         for (x = 0; x < width; x += 8)
 444             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 445         src += 8 * stride;
 446         dst += 8 * stride;
 447     }
 448
 449     return 0;
 450 }
 451
 452 /**
 453  * decode block and dequantize.
 454  * Note this is almost identical to MJPEG.
 455  */
 456 static int decode_i_block(FourXContext *f, int16_t *block)
 457 {
 458     int code, i, j, level, val;
 459
 460     /* DC coef */
 461     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 462     if (val >> 4)
 463         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 464
 465     if (val)
 466         val = get_xbits(&f->gb, val);
 467
 468     val        = val * dequant_table[0] + f->last_dc;
 469     f->last_dc = block[0] = val;
 470     /* AC coefs */
 471     i = 1;
 472     for (;;) {
 473         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 474
 475         /* EOB */
 476         if (code == 0)
 477             break;
 478         if (code == 0xf0) {
 479             i += 16;
 480         } else {
 481             level = get_xbits(&f->gb, code & 0xf);
 482             i    += code >> 4;
 483             if (i >= 64) {
 484                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 485                 return 0;
 486             }
 487
 488             j = ff_zigzag_direct[i];
 489             block[j] = level * dequant_table[j];
 490             i++;
 491             if (i >= 64)
 492                 break;
 493         }
 494     }
 495
 496     return 0;
 497 }
 498
 499 static inline void idct_put(FourXContext *f, AVFrame *frame, int x, int y)
 500 {
 501     int16_t (*block)[64] = f->block;
 502     int stride           = frame->linesize[0] >> 1;
 503     int i;
 504     uint16_t *dst = ((uint16_t*)frame->data[0]) + y * stride + x;
 505
 506     for (i = 0; i < 4; i++) {
 507         block[i][0] += 0x80 * 8 * 8;
 508         idct(block[i]);
 509     }
 510
 511     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 512         for (i = 4; i < 6; i++)
 513             idct(block[i]);
 514     }
 515
 516     /* Note transform is:
 517      * y  = ( 1b + 4g + 2r) / 14
 518      * cb = ( 3b - 2g - 1r) / 14
 519      * cr = (-1b - 4g + 5r) / 14 */
 520     for (y = 0; y < 8; y++) {
 521         for (x = 0; x < 8; x++) {
 522             int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
 523                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 524             int cb = block[4][x + 8 * y];
 525             int cr = block[5][x + 8 * y];
 526             int cg = (cb + cr) >> 1;
 527             int y;
 528
 529             cb += cb;
 530
 531             y               = temp[0];
 532             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 533             y               = temp[1];
 534             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 535             y               = temp[8];
 536             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 537             y               = temp[9];
 538             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 539             dst            += 2;
 540         }
 541         dst += 2 * stride - 2 * 8;
 542     }
 543 }
 544
 545 static int decode_i_mb(FourXContext *f)
 546 {
 547     int ret;
 548     int i;
 549
 550     f->dsp.clear_blocks(f->block[0]);
 551
 552     for (i = 0; i < 6; i++)
 553         if ((ret = decode_i_block(f, f->block[i])) < 0)
 554             return ret;
 555
 556     return 0;
 557 }
 558
 559 static const uint8_t *read_huffman_tables(FourXContext *f,
 560                                           const uint8_t * const buf)
 561 {
 562     int frequency[512] = { 0 };
 563     uint8_t flag[512];
 564     int up[512];
 565     uint8_t len_tab[257];
 566     int bits_tab[257];
 567     int start, end;
 568     const uint8_t *ptr = buf;
 569     int j;
 570
 571     memset(up, -1, sizeof(up));
 572
 573     start = *ptr++;
 574     end   = *ptr++;
 575     for (;;) {
 576         int i;
 577
 578         for (i = start; i <= end; i++)
 579             frequency[i] = *ptr++;
 580         start = *ptr++;
 581         if (start == 0)
 582             break;
 583
 584         end = *ptr++;
 585     }
 586     frequency[256] = 1;
 587
 588     while ((ptr - buf) & 3)
 589         ptr++; // 4byte align
 590
 591     for (j = 257; j < 512; j++) {
 592         int min_freq[2] = { 256 * 256, 256 * 256 };
 593         int smallest[2] = { 0, 0 };
 594         int i;
 595         for (i = 0; i < j; i++) {
 596             if (frequency[i] == 0)
 597                 continue;
 598             if (frequency[i] < min_freq[1]) {
 599                 if (frequency[i] < min_freq[0]) {
 600                     min_freq[1] = min_freq[0];
 601                     smallest[1] = smallest[0];
 602                     min_freq[0] = frequency[i];
 603                     smallest[0] = i;
 604                 } else {
 605                     min_freq[1] = frequency[i];
 606                     smallest[1] = i;
 607                 }
 608             }
 609         }
 610         if (min_freq[1] == 256 * 256)
 611             break;
 612
 613         frequency[j]           = min_freq[0] + min_freq[1];
 614         flag[smallest[0]]      = 0;
 615         flag[smallest[1]]      = 1;
 616         up[smallest[0]]        =
 617         up[smallest[1]]        = j;
 618         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 619     }
 620
 621     for (j = 0; j < 257; j++) {
 622         int node, len = 0, bits = 0;
 623
 624         for (node = j; up[node] != -1; node = up[node]) {
 625             bits += flag[node] << len;
 626             len++;
 627             if (len > 31)
 628                 // can this happen at all ?
 629                 av_log(f->avctx, AV_LOG_ERROR,
 630                        "vlc length overflow\n");
 631         }
 632
 633         bits_tab[j] = bits;
 634         len_tab[j]  = len;
 635     }
 636
 637     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 638                  bits_tab, 4, 4, 0))
 639         return NULL;
 640
 641     return ptr;
 642 }
 643
 644 static int mix(int c0, int c1)
 645 {
 646     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 647     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 648     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 649     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 650 }
 651
 652 static int decode_i2_frame(FourXContext *f, AVFrame *frame, const uint8_t *buf, int length)
 653 {
 654     int x, y, x2, y2;
 655     const int width  = f->avctx->width;
 656     const int height = f->avctx->height;
 657     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 658     uint16_t *dst    = (uint16_t*)frame->data[0];
 659     const int stride =            frame->linesize[0]>>1;
 660     GetByteContext g3;
 661
 662     if (length < mbs * 8) {
 663         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 664         return AVERROR_INVALIDDATA;
 665     }
 666     bytestream2_init(&g3, buf, length);
 667
 668     for (y = 0; y < height; y += 16) {
 669         for (x = 0; x < width; x += 16) {
 670             unsigned int color[4] = { 0 }, bits;
 671             // warning following is purely guessed ...
 672             color[0] = bytestream2_get_le16u(&g3);
 673             color[1] = bytestream2_get_le16u(&g3);
 674
 675             if (color[0] & 0x8000)
 676                 av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 677             if (color[1] & 0x8000)
 678                 av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 679
 680             color[2] = mix(color[0], color[1]);
 681             color[3] = mix(color[1], color[0]);
 682
 683             bits = bytestream2_get_le32u(&g3);
 684             for (y2 = 0; y2 < 16; y2++) {
 685                 for (x2 = 0; x2 < 16; x2++) {
 686                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 687                     dst[y2 * stride + x2] = color[(bits >> index) & 3];
 688                 }
 689             }
 690             dst += 16;
 691         }
 692         dst += 16 * stride - x;
 693     }
 694
 695     return 0;
 696 }
 697
 698 static int decode_i_frame(FourXContext *f, AVFrame *frame, const uint8_t *buf, int length)
 699 {
 700     int x, y, ret;
 701     const int width  = f->avctx->width;
 702     const int height = f->avctx->height;
 703     const unsigned int bitstream_size = AV_RL32(buf);
 704     int token_count av_unused;
 705     unsigned int prestream_size;
 706     const uint8_t *prestream;
 707
 708     if (length < bitstream_size + 12) {
 709         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 710         return AVERROR_INVALIDDATA;
 711     }
 712
 713     token_count    =     AV_RL32(buf + bitstream_size + 8);
 714     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 715     prestream      =             buf + bitstream_size + 12;
 716
 717     if (prestream_size + bitstream_size + 12 != length
 718         || bitstream_size > (1 << 26)
 719         || prestream_size > (1 << 26)) {
 720         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 721                prestream_size, bitstream_size, length);
 722         return AVERROR_INVALIDDATA;
 723     }
 724
 725     prestream = read_huffman_tables(f, prestream);
 726     if (!prestream) {
 727         av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
 728         return AVERROR_INVALIDDATA;
 729     }
 730
 731     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 732
 733     prestream_size = length + buf - prestream;
 734
 735     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 736                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 737     if (!f->bitstream_buffer)
 738         return AVERROR(ENOMEM);
 739     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
 740                      prestream_size / 4);
 741     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 742            0, FF_INPUT_BUFFER_PADDING_SIZE);
 743     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 744
 745     f->last_dc = 0 * 128 * 8 * 8;
 746
 747     for (y = 0; y < height; y += 16) {
 748         for (x = 0; x < width; x += 16) {
 749             if ((ret = decode_i_mb(f)) < 0)
 750                 return ret;
 751
 752             idct_put(f, frame, x, y);
 753         }
 754     }
 755
 756     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 757         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 758
 759     return 0;
 760 }
 761
 762 static int decode_frame(AVCodecContext *avctx, void *data,
 763                         int *got_frame, AVPacket *avpkt)
 764 {
 765     const uint8_t *buf    = avpkt->data;
 766     int buf_size          = avpkt->size;
 767     FourXContext *const f = avctx->priv_data;
 768     AVFrame *picture      = data;
 769     int i, frame_4cc, frame_size, ret;
 770
 771     frame_4cc = AV_RL32(buf);
 772     if (buf_size != AV_RL32(buf + 4) + 8 || buf_size < 20)
 773         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n",
 774                buf_size, AV_RL32(buf + 4));
 775
 776     if (frame_4cc == AV_RL32("cfrm")) {
 777         int free_index       = -1;
 778         const int data_size  = buf_size - 20;
 779         const int id         = AV_RL32(buf + 12);
 780         const int whole_size = AV_RL32(buf + 16);
 781         CFrameBuffer *cfrm;
 782
 783         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 784             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 785                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 786                        f->cfrm[i].id);
 787
 788         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 789             if (f->cfrm[i].id == id)
 790                 break;
 791             if (f->cfrm[i].size == 0)
 792                 free_index = i;
 793         }
 794
 795         if (i >= CFRAME_BUFFER_COUNT) {
 796             i             = free_index;
 797             f->cfrm[i].id = id;
 798         }
 799         cfrm = &f->cfrm[i];
 800
 801         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 802                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 803         // explicit check needed as memcpy below might not catch a NULL
 804         if (!cfrm->data) {
 805             av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
 806             return AVERROR(ENOMEM);
 807         }
 808
 809         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 810         cfrm->size += data_size;
 811
 812         if (cfrm->size >= whole_size) {
 813             buf        = cfrm->data;
 814             frame_size = cfrm->size;
 815
 816             if (id != avctx->frame_number)
 817                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 818                        id, avctx->frame_number);
 819
 820             cfrm->size = cfrm->id = 0;
 821             frame_4cc  = AV_RL32("pfrm");
 822         } else
 823             return buf_size;
 824     } else {
 825         buf        = buf      + 12;
 826         frame_size = buf_size - 12;
 827     }
 828
 829     // alternatively we would have to use our own buffer management
 830     avctx->flags |= CODEC_FLAG_EMU_EDGE;
 831
 832     if ((ret = ff_get_buffer(avctx, picture, AV_GET_BUFFER_FLAG_REF)) < 0) {
 833         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 834         return ret;
 835     }
 836
 837     if (frame_4cc == AV_RL32("ifr2")) {
 838         picture->pict_type = AV_PICTURE_TYPE_I;
 839         if ((ret = decode_i2_frame(f, picture, buf - 4, frame_size + 4)) < 0)
 840             return ret;
 841     } else if (frame_4cc == AV_RL32("ifrm")) {
 842         picture->pict_type = AV_PICTURE_TYPE_I;
 843         if ((ret = decode_i_frame(f, picture, buf, frame_size)) < 0)
 844             return ret;
 845     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 846         if (!f->last_picture->data[0]) {
 847             if ((ret = ff_get_buffer(avctx, f->last_picture,
 848                                      AV_GET_BUFFER_FLAG_REF)) < 0) {
 849                 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 850                 return ret;
 851             }
 852             memset(f->last_picture->data[0], 0, avctx->height * FFABS(f->last_picture->linesize[0]));
 853         }
 854
 855         picture->pict_type = AV_PICTURE_TYPE_P;
 856         if ((ret = decode_p_frame(f, picture, buf, frame_size)) < 0)
 857             return ret;
 858     } else if (frame_4cc == AV_RL32("snd_")) {
 859         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 860                buf_size);
 861     } else {
 862         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 863                buf_size);
 864     }
 865
 866     picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
 867
 868     av_frame_unref(f->last_picture);
 869     if ((ret = av_frame_ref(f->last_picture, picture)) < 0)
 870         return ret;
 871     *got_frame = 1;
 872
 873     emms_c();
 874
 875     return buf_size;
 876 }
 877
 878 static av_cold int decode_init(AVCodecContext *avctx)
 879 {
 880     FourXContext * const f = avctx->priv_data;
 881
 882     if (avctx->extradata_size != 4 || !avctx->extradata) {
 883         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 884         return 1;
 885     }
 886
 887     f->version = AV_RL32(avctx->extradata) >> 16;
 888     ff_dsputil_init(&f->dsp, avctx);
 889     f->avctx = avctx;
 890     init_vlcs(f);
 891
 892     if (f->version > 2)
 893         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 894     else
 895         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 896
 897     f->last_picture = av_frame_alloc();
 898     if (!f->last_picture)
 899         return AVERROR(ENOMEM);
 900
 901     return 0;
 902 }
 903
 904
 905 static av_cold int decode_end(AVCodecContext *avctx)
 906 {
 907     FourXContext * const f = avctx->priv_data;
 908     int i;
 909
 910     av_freep(&f->bitstream_buffer);
 911     f->bitstream_buffer_size = 0;
 912     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 913         av_freep(&f->cfrm[i].data);
 914         f->cfrm[i].allocated_size = 0;
 915     }
 916     ff_free_vlc(&f->pre_vlc);
 917     av_frame_free(&f->last_picture);
 918
 919     return 0;
 920 }
 921
 922 AVCodec ff_fourxm_decoder = {
 923     .name           = "4xm",
 924     .type           = AVMEDIA_TYPE_VIDEO,
 925     .id             = AV_CODEC_ID_4XM,
 926     .priv_data_size = sizeof(FourXContext),
 927     .init           = decode_init,
 928     .close          = decode_end,
 929     .decode         = decode_frame,
 930     .capabilities   = CODEC_CAP_DR1,
 931     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 932 };