git.sesse.net Git - ffmpeg/blob - libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/intreadwrite.h"
  28 #include "avcodec.h"
  29 #include "bytestream.h"
  30 #include "dsputil.h"
  31 #include "get_bits.h"
  32
  33 //#undef NDEBUG
  34 //#include <assert.h>
  35
  36 #define BLOCK_TYPE_VLC_BITS 5
  37 #define ACDC_VLC_BITS 9
  38
  39 #define CFRAME_BUFFER_COUNT 100
  40
  41 static const uint8_t block_type_tab[2][4][8][2] = {
  42     {
  43         {    // { 8, 4, 2 } x { 8, 4, 2}
  44             { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
  45         }, { // { 8, 4 } x 1
  46             { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  47         }, { // 1 x { 8, 4 }
  48             { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
  49         }, { // 1 x 2, 2 x 1
  50             { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
  51         }
  52     }, {
  53         {   // { 8, 4, 2 } x { 8, 4, 2}
  54             { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  55         }, {// { 8, 4 } x 1
  56             { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  57         }, {// 1 x { 8, 4 }
  58             { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
  59         }, {// 1 x 2, 2 x 1
  60             { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
  61       }
  62     }
  63 };
  64
  65 static const uint8_t size2index[4][4] = {
  66     { -1, 3, 1, 1 },
  67     {  3, 0, 0, 0 },
  68     {  2, 0, 0, 0 },
  69     {  2, 0, 0, 0 },
  70 };
  71
  72 static const int8_t mv[256][2] = {
  73     {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
  74     {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
  75     {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
  76     {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
  77     {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
  78     {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
  79     {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
  80     {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
  81     {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
  82     {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
  83     {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
  84     {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
  85     {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
  86     {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
  87     {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
  88     {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
  89     {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
  90     {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
  91     {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
  92     {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
  93     { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
  94     {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
  95     {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
  96     {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
  97     {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
  98     {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
  99     {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
 100     {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
 101     {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
 102     { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
 103     { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
 104     { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
 105 };
 106
 107 /* This is simply the scaled down elementwise product of the standard JPEG
 108  * quantizer table and the AAN premul table. */
 109 static const uint8_t dequant_table[64] = {
 110     16, 15, 13, 19, 24, 31, 28, 17,
 111     17, 23, 25, 31, 36, 63, 45, 21,
 112     18, 24, 27, 37, 52, 59, 49, 20,
 113     16, 28, 34, 40, 60, 80, 51, 20,
 114     18, 31, 48, 66, 68, 86, 56, 21,
 115     19, 38, 56, 59, 64, 64, 48, 20,
 116     27, 48, 55, 55, 56, 51, 35, 15,
 117     20, 35, 34, 32, 31, 22, 15,  8,
 118 };
 119
 120 static VLC block_type_vlc[2][4];
 121
 122
 123 typedef struct CFrameBuffer {
 124     unsigned int allocated_size;
 125     unsigned int size;
 126     int id;
 127     uint8_t *data;
 128 } CFrameBuffer;
 129
 130 typedef struct FourXContext {
 131     AVCodecContext *avctx;
 132     DSPContext dsp;
 133     AVFrame current_picture, last_picture;
 134     GetBitContext pre_gb;          ///< ac/dc prefix
 135     GetBitContext gb;
 136     GetByteContext g;
 137     GetByteContext g2;
 138     int mv[256];
 139     VLC pre_vlc;
 140     int last_dc;
 141     DECLARE_ALIGNED(16, DCTELEM, block)[6][64];
 142     void *bitstream_buffer;
 143     unsigned int bitstream_buffer_size;
 144     int version;
 145     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 146 } FourXContext;
 147
 148
 149 #define FIX_1_082392200  70936
 150 #define FIX_1_414213562  92682
 151 #define FIX_1_847759065 121095
 152 #define FIX_2_613125930 171254
 153
 154 #define MULTIPLY(var, const) (((var) * (const)) >> 16)
 155
 156 static void idct(DCTELEM block[64])
 157 {
 158     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 159     int tmp10, tmp11, tmp12, tmp13;
 160     int z5, z10, z11, z12, z13;
 161     int i;
 162     int temp[64];
 163
 164     for (i = 0; i < 8; i++) {
 165         tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
 166         tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
 167
 168         tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
 169         tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
 170
 171         tmp0 = tmp10 + tmp13;
 172         tmp3 = tmp10 - tmp13;
 173         tmp1 = tmp11 + tmp12;
 174         tmp2 = tmp11 - tmp12;
 175
 176         z13 = block[8 * 5 + i] + block[8 * 3 + i];
 177         z10 = block[8 * 5 + i] - block[8 * 3 + i];
 178         z11 = block[8 * 1 + i] + block[8 * 7 + i];
 179         z12 = block[8 * 1 + i] - block[8 * 7 + i];
 180
 181         tmp7  =          z11 + z13;
 182         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 183
 184         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 185         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 186         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 187
 188         tmp6 = tmp12 - tmp7;
 189         tmp5 = tmp11 - tmp6;
 190         tmp4 = tmp10 + tmp5;
 191
 192         temp[8 * 0 + i] = tmp0 + tmp7;
 193         temp[8 * 7 + i] = tmp0 - tmp7;
 194         temp[8 * 1 + i] = tmp1 + tmp6;
 195         temp[8 * 6 + i] = tmp1 - tmp6;
 196         temp[8 * 2 + i] = tmp2 + tmp5;
 197         temp[8 * 5 + i] = tmp2 - tmp5;
 198         temp[8 * 4 + i] = tmp3 + tmp4;
 199         temp[8 * 3 + i] = tmp3 - tmp4;
 200     }
 201
 202     for (i = 0; i < 8 * 8; i += 8) {
 203         tmp10 = temp[0 + i] + temp[4 + i];
 204         tmp11 = temp[0 + i] - temp[4 + i];
 205
 206         tmp13 = temp[2 + i] + temp[6 + i];
 207         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 208
 209         tmp0 = tmp10 + tmp13;
 210         tmp3 = tmp10 - tmp13;
 211         tmp1 = tmp11 + tmp12;
 212         tmp2 = tmp11 - tmp12;
 213
 214         z13 = temp[5 + i] + temp[3 + i];
 215         z10 = temp[5 + i] - temp[3 + i];
 216         z11 = temp[1 + i] + temp[7 + i];
 217         z12 = temp[1 + i] - temp[7 + i];
 218
 219         tmp7  = z11 + z13;
 220         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 221
 222         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 223         tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
 224         tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
 225
 226         tmp6 = tmp12 - tmp7;
 227         tmp5 = tmp11 - tmp6;
 228         tmp4 = tmp10 + tmp5;
 229
 230         block[0 + i] = (tmp0 + tmp7) >> 6;
 231         block[7 + i] = (tmp0 - tmp7) >> 6;
 232         block[1 + i] = (tmp1 + tmp6) >> 6;
 233         block[6 + i] = (tmp1 - tmp6) >> 6;
 234         block[2 + i] = (tmp2 + tmp5) >> 6;
 235         block[5 + i] = (tmp2 - tmp5) >> 6;
 236         block[4 + i] = (tmp3 + tmp4) >> 6;
 237         block[3 + i] = (tmp3 - tmp4) >> 6;
 238     }
 239 }
 240
 241 static av_cold void init_vlcs(FourXContext *f)
 242 {
 243     static VLC_TYPE table[2][4][32][2];
 244     int i, j;
 245
 246     for (i = 0; i < 2; i++) {
 247         for (j = 0; j < 4; j++) {
 248             block_type_vlc[i][j].table           = table[i][j];
 249             block_type_vlc[i][j].table_allocated = 32;
 250             init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
 251                      &block_type_tab[i][j][0][1], 2, 1,
 252                      &block_type_tab[i][j][0][0], 2, 1,
 253                      INIT_VLC_USE_NEW_STATIC);
 254         }
 255     }
 256 }
 257
 258 static void init_mv(FourXContext *f)
 259 {
 260     int i;
 261
 262     for (i = 0; i < 256; i++) {
 263         if (f->version > 1)
 264             f->mv[i] = mv[i][0] + mv[i][1] * f->current_picture.linesize[0] / 2;
 265         else
 266             f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * f->current_picture.linesize[0] / 2;
 267     }
 268 }
 269
 270 #if HAVE_BIGENDIAN
 271 #define LE_CENTRIC_MUL(dst, src, scale, dc)             \
 272     {                                                   \
 273         unsigned tmpval = AV_RN32(src);                 \
 274         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 275         tmpval = tmpval * (scale) + (dc);               \
 276         tmpval = (tmpval << 16) | (tmpval >> 16);       \
 277         AV_WN32A(dst, tmpval);                          \
 278     }
 279 #else
 280 #define LE_CENTRIC_MUL(dst, src, scale, dc)              \
 281     {                                                    \
 282         unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
 283         AV_WN32A(dst, tmpval);                           \
 284     }
 285 #endif
 286
 287 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
 288                         int h, int stride, int scale, unsigned dc)
 289 {
 290     int i;
 291     dc *= 0x10001;
 292
 293     switch (log2w) {
 294     case 0:
 295         for (i = 0; i < h; i++) {
 296             dst[0] = scale * src[0] + dc;
 297             if (scale)
 298                 src += stride;
 299             dst += stride;
 300         }
 301         break;
 302     case 1:
 303         for (i = 0; i < h; i++) {
 304             LE_CENTRIC_MUL(dst, src, scale, dc);
 305             if (scale)
 306                 src += stride;
 307             dst += stride;
 308         }
 309         break;
 310     case 2:
 311         for (i = 0; i < h; i++) {
 312             LE_CENTRIC_MUL(dst, src, scale, dc);
 313             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 314             if (scale)
 315                 src += stride;
 316             dst += stride;
 317         }
 318         break;
 319     case 3:
 320         for (i = 0; i < h; i++) {
 321             LE_CENTRIC_MUL(dst,     src,     scale, dc);
 322             LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
 323             LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
 324             LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
 325             if (scale)
 326                 src += stride;
 327             dst += stride;
 328         }
 329         break;
 330     default:
 331         assert(0);
 332     }
 333 }
 334
 335 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
 336                            int log2w, int log2h, int stride)
 337 {
 338     const int index = size2index[log2h][log2w];
 339     const int h     = 1 << log2h;
 340     int code        = get_vlc2(&f->gb,
 341                                block_type_vlc[1 - (f->version > 1)][index].table,
 342                                BLOCK_TYPE_VLC_BITS, 1);
 343     uint16_t *start = (uint16_t *)f->last_picture.data[0];
 344     uint16_t *end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
 345
 346     assert(code >= 0 && code <= 6);
 347
 348     if (code == 0) {
 349         src += f->mv[bytestream2_get_byte(&f->g)];
 350         if (start > src || src > end) {
 351             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 352             return;
 353         }
 354         mcdc(dst, src, log2w, h, stride, 1, 0);
 355     } else if (code == 1) {
 356         log2h--;
 357         decode_p_block(f, dst, src, log2w, log2h, stride);
 358         decode_p_block(f, dst + (stride << log2h),
 359                           src + (stride << log2h), log2w, log2h, stride);
 360     } else if (code == 2) {
 361         log2w--;
 362         decode_p_block(f, dst , src, log2w, log2h, stride);
 363         decode_p_block(f, dst + (1 << log2w),
 364                           src + (1 << log2w), log2w, log2h, stride);
 365     } else if (code == 3 && f->version < 2) {
 366         mcdc(dst, src, log2w, h, stride, 1, 0);
 367     } else if (code == 4) {
 368         src += f->mv[bytestream2_get_byte(&f->g)];
 369         if (start > src || src > end) {
 370             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 371             return;
 372         }
 373         mcdc(dst, src, log2w, h, stride, 1, bytestream2_get_le16(&f->g2));
 374     } else if (code == 5) {
 375         mcdc(dst, src, log2w, h, stride, 0, bytestream2_get_le16(&f->g2));
 376     } else if (code == 6) {
 377         if (log2w) {
 378             dst[0]      = bytestream2_get_le16(&f->g2);
 379             dst[1]      = bytestream2_get_le16(&f->g2);
 380         } else {
 381             dst[0]      = bytestream2_get_le16(&f->g2);
 382             dst[stride] = bytestream2_get_le16(&f->g2);
 383         }
 384     }
 385 }
 386
 387 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
 388 {
 389     int x, y;
 390     const int width  = f->avctx->width;
 391     const int height = f->avctx->height;
 392     uint16_t *src    = (uint16_t *)f->last_picture.data[0];
 393     uint16_t *dst    = (uint16_t *)f->current_picture.data[0];
 394     const int stride =             f->current_picture.linesize[0] >> 1;
 395     unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
 396                  bytestream_offset, wordstream_offset;
 397
 398     if (f->version > 1) {
 399         extra           = 20;
 400         bitstream_size  = AV_RL32(buf + 8);
 401         wordstream_size = AV_RL32(buf + 12);
 402         bytestream_size = AV_RL32(buf + 16);
 403     } else {
 404         extra           = 0;
 405         bitstream_size  = AV_RL16(buf - 4);
 406         wordstream_size = AV_RL16(buf - 2);
 407         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
 408     }
 409
 410     if (bitstream_size + bytestream_size + wordstream_size + extra != length
 411         || bitstream_size  > (1 << 26)
 412         || bytestream_size > (1 << 26)
 413         || wordstream_size > (1 << 26)) {
 414         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
 415                bitstream_size, bytestream_size, wordstream_size,
 416                bitstream_size + bytestream_size + wordstream_size - length);
 417         return -1;
 418     }
 419
 420     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 421                    bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 422     if (!f->bitstream_buffer)
 423         return AVERROR(ENOMEM);
 424     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra),
 425                      bitstream_size / 4);
 426     memset((uint8_t*)f->bitstream_buffer + bitstream_size,
 427            0, FF_INPUT_BUFFER_PADDING_SIZE);
 428     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 429
 430     wordstream_offset = extra + bitstream_size;
 431     bytestream_offset = extra + bitstream_size + wordstream_size;
 432     bytestream2_init(&f->g2, buf + wordstream_offset,
 433                      length - wordstream_offset);
 434     bytestream2_init(&f->g, buf + bytestream_offset,
 435                      length - bytestream_offset);
 436
 437     init_mv(f);
 438
 439     for (y = 0; y < height; y += 8) {
 440         for (x = 0; x < width; x += 8)
 441             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 442         src += 8 * stride;
 443         dst += 8 * stride;
 444     }
 445
 446     return 0;
 447 }
 448
 449 /**
 450  * decode block and dequantize.
 451  * Note this is almost identical to MJPEG.
 452  */
 453 static int decode_i_block(FourXContext *f, DCTELEM *block)
 454 {
 455     int code, i, j, level, val;
 456
 457     /* DC coef */
 458     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 459     if (val >> 4)
 460         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 461
 462     if (val)
 463         val = get_xbits(&f->gb, val);
 464
 465     val        = val * dequant_table[0] + f->last_dc;
 466     f->last_dc = block[0] = val;
 467     /* AC coefs */
 468     i = 1;
 469     for (;;) {
 470         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 471
 472         /* EOB */
 473         if (code == 0)
 474             break;
 475         if (code == 0xf0) {
 476             i += 16;
 477         } else {
 478             level = get_xbits(&f->gb, code & 0xf);
 479             i    += code >> 4;
 480             if (i >= 64) {
 481                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 482                 return 0;
 483             }
 484
 485             j = ff_zigzag_direct[i];
 486             block[j] = level * dequant_table[j];
 487             i++;
 488             if (i >= 64)
 489                 break;
 490         }
 491     }
 492
 493     return 0;
 494 }
 495
 496 static inline void idct_put(FourXContext *f, int x, int y)
 497 {
 498     DCTELEM (*block)[64] = f->block;
 499     int stride           = f->current_picture.linesize[0] >> 1;
 500     int i;
 501     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
 502
 503     for (i = 0; i < 4; i++) {
 504         block[i][0] += 0x80 * 8 * 8;
 505         idct(block[i]);
 506     }
 507
 508     if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
 509         for (i = 4; i < 6; i++)
 510             idct(block[i]);
 511     }
 512
 513     /* Note transform is:
 514      * y  = ( 1b + 4g + 2r) / 14
 515      * cb = ( 3b - 2g - 1r) / 14
 516      * cr = (-1b - 4g + 5r) / 14 */
 517     for (y = 0; y < 8; y++) {
 518         for (x = 0; x < 8; x++) {
 519             DCTELEM *temp = block[(x >> 2) + 2 * (y >> 2)] +
 520                             2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
 521             int cb = block[4][x + 8 * y];
 522             int cr = block[5][x + 8 * y];
 523             int cg = (cb + cr) >> 1;
 524             int y;
 525
 526             cb += cb;
 527
 528             y               = temp[0];
 529             dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 530             y               = temp[1];
 531             dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 532             y               = temp[8];
 533             dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 534             y               = temp[9];
 535             dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
 536             dst            += 2;
 537         }
 538         dst += 2 * stride - 2 * 8;
 539     }
 540 }
 541
 542 static int decode_i_mb(FourXContext *f)
 543 {
 544     int i;
 545
 546     f->dsp.clear_blocks(f->block[0]);
 547
 548     for (i = 0; i < 6; i++)
 549         if (decode_i_block(f, f->block[i]) < 0)
 550             return -1;
 551
 552     return 0;
 553 }
 554
 555 static const uint8_t *read_huffman_tables(FourXContext *f,
 556                                           const uint8_t * const buf)
 557 {
 558     int frequency[512] = { 0 };
 559     uint8_t flag[512];
 560     int up[512];
 561     uint8_t len_tab[257];
 562     int bits_tab[257];
 563     int start, end;
 564     const uint8_t *ptr = buf;
 565     int j;
 566
 567     memset(up, -1, sizeof(up));
 568
 569     start = *ptr++;
 570     end   = *ptr++;
 571     for (;;) {
 572         int i;
 573
 574         for (i = start; i <= end; i++)
 575             frequency[i] = *ptr++;
 576         start = *ptr++;
 577         if (start == 0)
 578             break;
 579
 580         end = *ptr++;
 581     }
 582     frequency[256] = 1;
 583
 584     while ((ptr - buf) & 3)
 585         ptr++; // 4byte align
 586
 587     for (j = 257; j < 512; j++) {
 588         int min_freq[2] = { 256 * 256, 256 * 256 };
 589         int smallest[2] = { 0, 0 };
 590         int i;
 591         for (i = 0; i < j; i++) {
 592             if (frequency[i] == 0)
 593                 continue;
 594             if (frequency[i] < min_freq[1]) {
 595                 if (frequency[i] < min_freq[0]) {
 596                     min_freq[1] = min_freq[0];
 597                     smallest[1] = smallest[0];
 598                     min_freq[0] = frequency[i];
 599                     smallest[0] = i;
 600                 } else {
 601                     min_freq[1] = frequency[i];
 602                     smallest[1] = i;
 603                 }
 604             }
 605         }
 606         if (min_freq[1] == 256 * 256)
 607             break;
 608
 609         frequency[j]           = min_freq[0] + min_freq[1];
 610         flag[smallest[0]]      = 0;
 611         flag[smallest[1]]      = 1;
 612         up[smallest[0]]        =
 613         up[smallest[1]]        = j;
 614         frequency[smallest[0]] = frequency[smallest[1]] = 0;
 615     }
 616
 617     for (j = 0; j < 257; j++) {
 618         int node, len = 0, bits = 0;
 619
 620         for (node = j; up[node] != -1; node = up[node]) {
 621             bits += flag[node] << len;
 622             len++;
 623             if (len > 31)
 624                 // can this happen at all ?
 625                 av_log(f->avctx, AV_LOG_ERROR,
 626                        "vlc length overflow\n");
 627         }
 628
 629         bits_tab[j] = bits;
 630         len_tab[j]  = len;
 631     }
 632
 633     if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
 634                  bits_tab, 4, 4, 0))
 635         return NULL;
 636
 637     return ptr;
 638 }
 639
 640 static int mix(int c0, int c1)
 641 {
 642     int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
 643     int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
 644     int red   =  2 * (c0 >> 10)    + (c1 >> 10);
 645     return red / 3 * 1024 + green / 3 * 32 + blue / 3;
 646 }
 647
 648 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
 649 {
 650     int x, y, x2, y2;
 651     const int width  = f->avctx->width;
 652     const int height = f->avctx->height;
 653     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
 654     uint16_t *dst    = (uint16_t*)f->current_picture.data[0];
 655     const int stride =            f->current_picture.linesize[0]>>1;
 656     GetByteContext g3;
 657
 658     if (length < mbs * 8) {
 659         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 660         return AVERROR_INVALIDDATA;
 661     }
 662     bytestream2_init(&g3, buf, length);
 663
 664     for (y = 0; y < height; y += 16) {
 665         for (x = 0; x < width; x += 16) {
 666             unsigned int color[4] = { 0 }, bits;
 667             // warning following is purely guessed ...
 668             color[0] = bytestream2_get_le16u(&g3);
 669             color[1] = bytestream2_get_le16u(&g3);
 670
 671             if (color[0] & 0x8000)
 672                 av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 673             if (color[1] & 0x8000)
 674                 av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 675
 676             color[2] = mix(color[0], color[1]);
 677             color[3] = mix(color[1], color[0]);
 678
 679             bits = bytestream2_get_le32u(&g3);
 680             for (y2 = 0; y2 < 16; y2++) {
 681                 for (x2 = 0; x2 < 16; x2++) {
 682                     int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
 683                     dst[y2 * stride + x2] = color[(bits >> index) & 3];
 684                 }
 685             }
 686             dst += 16;
 687         }
 688         dst += 16 * stride - x;
 689     }
 690
 691     return 0;
 692 }
 693
 694 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
 695 {
 696     int x, y;
 697     const int width  = f->avctx->width;
 698     const int height = f->avctx->height;
 699     const unsigned int bitstream_size = AV_RL32(buf);
 700     int token_count av_unused;
 701     unsigned int prestream_size;
 702     const uint8_t *prestream;
 703
 704     if (length < bitstream_size + 12) {
 705         av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
 706         return AVERROR_INVALIDDATA;
 707     }
 708
 709     token_count    =     AV_RL32(buf + bitstream_size + 8);
 710     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
 711     prestream      =             buf + bitstream_size + 12;
 712
 713     if (prestream_size + bitstream_size + 12 != length
 714         || bitstream_size > (1 << 26)
 715         || prestream_size > (1 << 26)) {
 716         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
 717                prestream_size, bitstream_size, length);
 718         return -1;
 719     }
 720
 721     prestream = read_huffman_tables(f, prestream);
 722
 723     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 724
 725     prestream_size = length + buf - prestream;
 726
 727     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
 728                    prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 729     if (!f->bitstream_buffer)
 730         return AVERROR(ENOMEM);
 731     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream,
 732                      prestream_size / 4);
 733     memset((uint8_t*)f->bitstream_buffer + prestream_size,
 734            0, FF_INPUT_BUFFER_PADDING_SIZE);
 735     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 736
 737     f->last_dc = 0 * 128 * 8 * 8;
 738
 739     for (y = 0; y < height; y += 16) {
 740         for (x = 0; x < width; x += 16) {
 741             if (decode_i_mb(f) < 0)
 742                 return -1;
 743
 744             idct_put(f, x, y);
 745         }
 746     }
 747
 748     if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 749         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 750
 751     return 0;
 752 }
 753
 754 static int decode_frame(AVCodecContext *avctx, void *data,
 755                         int *data_size, AVPacket *avpkt)
 756 {
 757     const uint8_t *buf    = avpkt->data;
 758     int buf_size          = avpkt->size;
 759     FourXContext *const f = avctx->priv_data;
 760     AVFrame *picture      = data;
 761     AVFrame *p, temp;
 762     int i, frame_4cc, frame_size;
 763
 764     frame_4cc = AV_RL32(buf);
 765     if (buf_size != AV_RL32(buf + 4) + 8 || buf_size < 20)
 766         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n",
 767                buf_size, AV_RL32(buf + 4));
 768
 769     if (frame_4cc == AV_RL32("cfrm")) {
 770         int free_index       = -1;
 771         const int data_size  = buf_size - 20;
 772         const int id         = AV_RL32(buf + 12);
 773         const int whole_size = AV_RL32(buf + 16);
 774         CFrameBuffer *cfrm;
 775
 776         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
 777             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 778                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
 779                        f->cfrm[i].id);
 780
 781         for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 782             if (f->cfrm[i].id == id)
 783                 break;
 784             if (f->cfrm[i].size == 0)
 785                 free_index = i;
 786         }
 787
 788         if (i >= CFRAME_BUFFER_COUNT) {
 789             i             = free_index;
 790             f->cfrm[i].id = id;
 791         }
 792         cfrm = &f->cfrm[i];
 793
 794         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
 795                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 796         // explicit check needed as memcpy below might not catch a NULL
 797         if (!cfrm->data) {
 798             av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
 799             return -1;
 800         }
 801
 802         memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
 803         cfrm->size += data_size;
 804
 805         if (cfrm->size >= whole_size) {
 806             buf        = cfrm->data;
 807             frame_size = cfrm->size;
 808
 809             if (id != avctx->frame_number)
 810                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
 811                        id, avctx->frame_number);
 812
 813             cfrm->size = cfrm->id = 0;
 814             frame_4cc  = AV_RL32("pfrm");
 815         } else
 816             return buf_size;
 817     } else {
 818         buf        = buf      + 12;
 819         frame_size = buf_size - 12;
 820     }
 821
 822     temp               = f->current_picture;
 823     f->current_picture = f->last_picture;
 824     f->last_picture    = temp;
 825
 826     p                  = &f->current_picture;
 827     avctx->coded_frame = p;
 828
 829     // alternatively we would have to use our own buffer management
 830     avctx->flags |= CODEC_FLAG_EMU_EDGE;
 831
 832     if (p->data[0])
 833         avctx->release_buffer(avctx, p);
 834
 835     p->reference = 1;
 836     if (avctx->get_buffer(avctx, p) < 0) {
 837         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 838         return -1;
 839     }
 840
 841     if (frame_4cc == AV_RL32("ifr2")) {
 842         p->pict_type = AV_PICTURE_TYPE_I;
 843         if (decode_i2_frame(f, buf - 4, frame_size + 4) < 0)
 844             return -1;
 845     } else if (frame_4cc == AV_RL32("ifrm")) {
 846         p->pict_type = AV_PICTURE_TYPE_I;
 847         if (decode_i_frame(f, buf, frame_size) < 0)
 848             return -1;
 849     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
 850         if (!f->last_picture.data[0]) {
 851             f->last_picture.reference = 1;
 852             if (avctx->get_buffer(avctx, &f->last_picture) < 0) {
 853                 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 854                 return -1;
 855             }
 856         }
 857
 858         p->pict_type = AV_PICTURE_TYPE_P;
 859         if (decode_p_frame(f, buf, frame_size) < 0)
 860             return -1;
 861     } else if (frame_4cc == AV_RL32("snd_")) {
 862         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
 863                buf_size);
 864     } else {
 865         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
 866                buf_size);
 867     }
 868
 869     p->key_frame = p->pict_type == AV_PICTURE_TYPE_I;
 870
 871     *picture   = *p;
 872     *data_size = sizeof(AVPicture);
 873
 874     emms_c();
 875
 876     return buf_size;
 877 }
 878
 879
 880 static av_cold void common_init(AVCodecContext *avctx)
 881 {
 882     FourXContext * const f = avctx->priv_data;
 883
 884     ff_dsputil_init(&f->dsp, avctx);
 885
 886     f->avctx = avctx;
 887 }
 888
 889 static av_cold int decode_init(AVCodecContext *avctx)
 890 {
 891     FourXContext * const f = avctx->priv_data;
 892
 893     if (avctx->extradata_size != 4 || !avctx->extradata) {
 894         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 895         return 1;
 896     }
 897
 898     f->version = AV_RL32(avctx->extradata) >> 16;
 899     common_init(avctx);
 900     init_vlcs(f);
 901
 902     if (f->version > 2)
 903         avctx->pix_fmt = AV_PIX_FMT_RGB565;
 904     else
 905         avctx->pix_fmt = AV_PIX_FMT_BGR555;
 906
 907     return 0;
 908 }
 909
 910
 911 static av_cold int decode_end(AVCodecContext *avctx)
 912 {
 913     FourXContext * const f = avctx->priv_data;
 914     int i;
 915
 916     av_freep(&f->bitstream_buffer);
 917     f->bitstream_buffer_size = 0;
 918     for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
 919         av_freep(&f->cfrm[i].data);
 920         f->cfrm[i].allocated_size = 0;
 921     }
 922     ff_free_vlc(&f->pre_vlc);
 923     if (f->current_picture.data[0])
 924         avctx->release_buffer(avctx, &f->current_picture);
 925     if (f->last_picture.data[0])
 926         avctx->release_buffer(avctx, &f->last_picture);
 927
 928     return 0;
 929 }
 930
 931 AVCodec ff_fourxm_decoder = {
 932     .name           = "4xm",
 933     .type           = AVMEDIA_TYPE_VIDEO,
 934     .id             = AV_CODEC_ID_4XM,
 935     .priv_data_size = sizeof(FourXContext),
 936     .init           = decode_init,
 937     .close          = decode_end,
 938     .decode         = decode_frame,
 939     .capabilities   = CODEC_CAP_DR1,
 940     .long_name      = NULL_IF_CONFIG_SMALL("4X Movie"),
 941 };