git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "golomb.h"
  34 #include "dirac_arith.h"
  35 #include "mpeg12data.h"
  36 #include "libavcodec/mpegvideo.h"
  37 #include "mpegvideoencdsp.h"
  38 #include "dirac_dwt.h"
  39 #include "dirac.h"
  40 #include "diracdsp.h"
  41 #include "videodsp.h"
  42
  43 /**
  44  * The spec limits the number of wavelet decompositions to 4 for both
  45  * level 1 (VC-2) and 128 (long-gop default).
  46  * 5 decompositions is the maximum before >16-bit buffers are needed.
  47  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  48  * the others to 4 decompositions (or 3 for the fidelity filter).
  49  *
  50  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  51  */
  52 #define MAX_DWT_LEVELS 5
  53
  54 /**
  55  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  56  */
  57 #define MAX_REFERENCE_FRAMES 8
  58 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  59 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  60 #define MAX_QUANT 68        /* max quant for VC-2 */
  61 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  62
  63 /**
  64  * DiracBlock->ref flags, if set then the block does MC from the given ref
  65  */
  66 #define DIRAC_REF_MASK_REF1   1
  67 #define DIRAC_REF_MASK_REF2   2
  68 #define DIRAC_REF_MASK_GLOBAL 4
  69
  70 /**
  71  * Value of Picture.reference when Picture is not a reference picture, but
  72  * is held for delayed output.
  73  */
  74 #define DELAYED_PIC_REF 4
  75
  76 #define CALC_PADDING(size, depth)                       \
  77     (((size + (1 << depth) - 1) >> depth) << depth)
  78
  79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  80
  81 typedef struct {
  82     AVFrame *avframe;
  83     int interpolated[3];    /* 1 if hpel[] is valid */
  84     uint8_t *hpel[3][4];
  85     uint8_t *hpel_base[3][4];
  86     int reference;
  87 } DiracFrame;
  88
  89 typedef struct {
  90     union {
  91         int16_t mv[2][2];
  92         int16_t dc[3];
  93     } u; /* anonymous unions aren't in C99 :( */
  94     uint8_t ref;
  95 } DiracBlock;
  96
  97 typedef struct SubBand {
  98     int level;
  99     int orientation;
 100     int stride; /* in bytes */
 101     int width;
 102     int height;
 103     int pshift;
 104     int quant;
 105     uint8_t *ibuf;
 106     struct SubBand *parent;
 107
 108     /* for low delay */
 109     unsigned length;
 110     const uint8_t *coeff_data;
 111 } SubBand;
 112
 113 typedef struct Plane {
 114     int width;
 115     int height;
 116     ptrdiff_t stride;
 117
 118     int idwt_width;
 119     int idwt_height;
 120     int idwt_stride;
 121     uint8_t *idwt_buf;
 122     uint8_t *idwt_buf_base;
 123     uint8_t *idwt_tmp;
 124
 125     /* block length */
 126     uint8_t xblen;
 127     uint8_t yblen;
 128     /* block separation (block n+1 starts after this many pixels in block n) */
 129     uint8_t xbsep;
 130     uint8_t ybsep;
 131     /* amount of overspill on each edge (half of the overlap between blocks) */
 132     uint8_t xoffset;
 133     uint8_t yoffset;
 134
 135     SubBand band[MAX_DWT_LEVELS][4];
 136 } Plane;
 137
 138 typedef struct DiracContext {
 139     AVCodecContext *avctx;
 140     MpegvideoEncDSPContext mpvencdsp;
 141     VideoDSPContext vdsp;
 142     DiracDSPContext diracdsp;
 143     GetBitContext gb;
 144     dirac_source_params source;
 145     int seen_sequence_header;
 146     int frame_number;           /* number of the next frame to display       */
 147     Plane plane[3];
 148     int chroma_x_shift;
 149     int chroma_y_shift;
 150
 151     int bit_depth;              /* bit depth                                 */
 152     int pshift;                 /* pixel shift = bit_depth > 8               */
 153
 154     int zero_res;               /* zero residue flag                         */
 155     int is_arith;               /* whether coeffs use arith or golomb coding */
 156     int low_delay;              /* use the low delay syntax                  */
 157     int globalmc_flag;          /* use global motion compensation            */
 158     int num_refs;               /* number of reference pictures              */
 159
 160     /* wavelet decoding */
 161     unsigned wavelet_depth;     /* depth of the IDWT                         */
 162     unsigned wavelet_idx;
 163
 164     /**
 165      * schroedinger older than 1.0.8 doesn't store
 166      * quant delta if only one codebook exists in a band
 167      */
 168     unsigned old_delta_quant;
 169     unsigned codeblock_mode;
 170
 171     unsigned num_x;              /* number of horizontal slices               */
 172     unsigned num_y;              /* number of vertical slices                 */
 173
 174     struct {
 175         unsigned width;
 176         unsigned height;
 177     } codeblock[MAX_DWT_LEVELS+1];
 178
 179     struct {
 180         AVRational bytes;       /* average bytes per slice                   */
 181         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 182     } lowdelay;
 183
 184     struct {
 185         int pan_tilt[2];        /* pan/tilt vector                           */
 186         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 187         int perspective[2];     /* perspective vector                        */
 188         unsigned zrs_exp;
 189         unsigned perspective_exp;
 190     } globalmc[2];
 191
 192     /* motion compensation */
 193     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 194     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 195     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 196
 197     int blwidth;                /* number of blocks (horizontally)           */
 198     int blheight;               /* number of blocks (vertically)             */
 199     int sbwidth;                /* number of superblocks (horizontally)      */
 200     int sbheight;               /* number of superblocks (vertically)        */
 201
 202     uint8_t *sbsplit;
 203     DiracBlock *blmotion;
 204
 205     uint8_t *edge_emu_buffer[4];
 206     uint8_t *edge_emu_buffer_base;
 207
 208     uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
 209     uint8_t *mcscratch;
 210     int buffer_stride;
 211
 212     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 213
 214     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 215     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 216     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 217     dirac_weight_func weight_func;
 218     dirac_biweight_func biweight_func;
 219
 220     DiracFrame *current_picture;
 221     DiracFrame *ref_pics[2];
 222
 223     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 224     DiracFrame *delay_frames[MAX_DELAY+1];
 225     DiracFrame all_frames[MAX_FRAMES];
 226 } DiracContext;
 227
 228 /**
 229  * Dirac Specification ->
 230  * Parse code values. 9.6.1 Table 9.1
 231  */
 232 enum dirac_parse_code {
 233     pc_seq_header         = 0x00,
 234     pc_eos                = 0x10,
 235     pc_aux_data           = 0x20,
 236     pc_padding            = 0x30,
 237 };
 238
 239 enum dirac_subband {
 240     subband_ll = 0,
 241     subband_hl = 1,
 242     subband_lh = 2,
 243     subband_hh = 3,
 244     subband_nb,
 245 };
 246
 247 static const uint8_t default_qmat[][4][4] = {
 248     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 249     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 250     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 251     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 252     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 253     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 254     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 255 };
 256
 257 static const int qscale_tab[MAX_QUANT+1] = {
 258     4,     5,     6,     7,     8,    10,    11,    13,
 259     16,    19,    23,    27,    32,    38,    45,    54,
 260     64,    76,    91,   108,   128,   152,   181,   215,
 261     256,   304,   362,   431,   512,   609,   724,   861,
 262     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 263     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 264     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 265     65536, 77936
 266 };
 267
 268 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 269     1,     2,     3,     4,     4,     5,     6,     7,
 270     8,    10,    12,    14,    16,    19,    23,    27,
 271     32,    38,    46,    54,    64,    76,    91,   108,
 272     128,   152,   181,   216,   256,   305,   362,   431,
 273     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 274     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 275     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 276     32768, 38968
 277 };
 278
 279 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 280     1,     2,     2,     3,     3,     4,     4,     5,
 281     6,     7,     9,    10,    12,    14,    17,    20,
 282     24,    29,    34,    41,    48,    57,    68,    81,
 283     96,   114,   136,   162,   192,   228,   272,   323,
 284     384,   457,   543,   646,   768,   913,  1086,  1292,
 285     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 286     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 287     24576, 29226
 288 };
 289
 290 /* magic number division by 3 from schroedinger */
 291 static inline int divide3(int x)
 292 {
 293     return ((x+1)*21845 + 10922) >> 16;
 294 }
 295
 296 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 297 {
 298     DiracFrame *remove_pic = NULL;
 299     int i, remove_idx = -1;
 300
 301     for (i = 0; framelist[i]; i++)
 302         if (framelist[i]->avframe->display_picture_number == picnum) {
 303             remove_pic = framelist[i];
 304             remove_idx = i;
 305         }
 306
 307     if (remove_pic)
 308         for (i = remove_idx; framelist[i]; i++)
 309             framelist[i] = framelist[i+1];
 310
 311     return remove_pic;
 312 }
 313
 314 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 315 {
 316     int i;
 317     for (i = 0; i < maxframes; i++)
 318         if (!framelist[i]) {
 319             framelist[i] = frame;
 320             return 0;
 321         }
 322     return -1;
 323 }
 324
 325 static int alloc_sequence_buffers(DiracContext *s)
 326 {
 327     int sbwidth  = DIVRNDUP(s->source.width,  4);
 328     int sbheight = DIVRNDUP(s->source.height, 4);
 329     int i, w, h, top_padding;
 330
 331     /* todo: think more about this / use or set Plane here */
 332     for (i = 0; i < 3; i++) {
 333         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 334         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 335         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 336         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 337
 338         /* we allocate the max we support here since num decompositions can
 339          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 340          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 341          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 342          * on each side */
 343         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 344         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 345         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 346
 347         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * (2 << s->pshift));
 348         s->plane[i].idwt_tmp      = av_malloc_array((w+16), 2 << s->pshift);
 349         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + (top_padding*w)*(2 << s->pshift);
 350         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 351             return AVERROR(ENOMEM);
 352     }
 353
 354     /* fixme: allocate using real stride here */
 355     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 356     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 357
 358     if (!s->sbsplit || !s->blmotion)
 359         return AVERROR(ENOMEM);
 360     return 0;
 361 }
 362
 363 static int alloc_buffers(DiracContext *s, int stride)
 364 {
 365     int w = s->source.width;
 366     int h = s->source.height;
 367
 368     av_assert0(stride >= w);
 369     stride += 64;
 370
 371     if (s->buffer_stride >= stride)
 372         return 0;
 373     s->buffer_stride = 0;
 374
 375     av_freep(&s->edge_emu_buffer_base);
 376     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 377     av_freep(&s->mctmp);
 378     av_freep(&s->mcscratch);
 379
 380     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 381
 382     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 383     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 384
 385     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 386         return AVERROR(ENOMEM);
 387
 388     s->buffer_stride = stride;
 389     return 0;
 390 }
 391
 392 static void free_sequence_buffers(DiracContext *s)
 393 {
 394     int i, j, k;
 395
 396     for (i = 0; i < MAX_FRAMES; i++) {
 397         if (s->all_frames[i].avframe->data[0]) {
 398             av_frame_unref(s->all_frames[i].avframe);
 399             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 400         }
 401
 402         for (j = 0; j < 3; j++)
 403             for (k = 1; k < 4; k++)
 404                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 405     }
 406
 407     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 408     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 409
 410     for (i = 0; i < 3; i++) {
 411         av_freep(&s->plane[i].idwt_buf_base);
 412         av_freep(&s->plane[i].idwt_tmp);
 413     }
 414
 415     s->buffer_stride = 0;
 416     av_freep(&s->sbsplit);
 417     av_freep(&s->blmotion);
 418     av_freep(&s->edge_emu_buffer_base);
 419
 420     av_freep(&s->mctmp);
 421     av_freep(&s->mcscratch);
 422 }
 423
 424 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 425 {
 426     DiracContext *s = avctx->priv_data;
 427     int i;
 428
 429     s->avctx = avctx;
 430     s->frame_number = -1;
 431
 432     ff_diracdsp_init(&s->diracdsp);
 433     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 434     ff_videodsp_init(&s->vdsp, 8);
 435
 436     for (i = 0; i < MAX_FRAMES; i++) {
 437         s->all_frames[i].avframe = av_frame_alloc();
 438         if (!s->all_frames[i].avframe) {
 439             while (i > 0)
 440                 av_frame_free(&s->all_frames[--i].avframe);
 441             return AVERROR(ENOMEM);
 442         }
 443     }
 444
 445     return 0;
 446 }
 447
 448 static void dirac_decode_flush(AVCodecContext *avctx)
 449 {
 450     DiracContext *s = avctx->priv_data;
 451     free_sequence_buffers(s);
 452     s->seen_sequence_header = 0;
 453     s->frame_number = -1;
 454 }
 455
 456 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 457 {
 458     DiracContext *s = avctx->priv_data;
 459     int i;
 460
 461     dirac_decode_flush(avctx);
 462     for (i = 0; i < MAX_FRAMES; i++)
 463         av_frame_free(&s->all_frames[i].avframe);
 464
 465     return 0;
 466 }
 467
 468 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 469
 470 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 471 {
 472     int sign, coeff;
 473
 474     coeff = svq3_get_ue_golomb(gb);
 475     if (coeff) {
 476         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 477         sign  = get_bits1(gb);
 478         coeff = (coeff ^ -sign) + sign;
 479     }
 480     return coeff;
 481 }
 482
 483 #define UNPACK_ARITH(n, type) \
 484     static inline void coeff_unpack_arith_##n(DiracArith *c, int qfactor, int qoffset, \
 485                                               SubBand *b, type *buf, int x, int y) \
 486     { \
 487         int coeff, sign, sign_pred = 0, pred_ctx = CTX_ZPZN_F1; \
 488         const int mstride = -(b->stride >> (1+b->pshift)); \
 489         if (b->parent) { \
 490             const type *pbuf = (type *)b->parent->ibuf; \
 491             const int stride = b->parent->stride >> (1+b->parent->pshift); \
 492             pred_ctx += !!pbuf[stride * (y>>1) + (x>>1)] << 1; \
 493         } \
 494         if (b->orientation == subband_hl) \
 495             sign_pred = buf[mstride]; \
 496         if (x) { \
 497             pred_ctx += !(buf[-1] | buf[mstride] | buf[-1 + mstride]); \
 498             if (b->orientation == subband_lh) \
 499                 sign_pred = buf[-1]; \
 500         } else { \
 501             pred_ctx += !buf[mstride]; \
 502         } \
 503         coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA); \
 504         if (coeff) { \
 505             coeff = (coeff * qfactor + qoffset + 2) >> 2; \
 506             sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred)); \
 507             coeff = (coeff ^ -sign) + sign; \
 508         } \
 509         *buf = coeff; \
 510     } \
 511
 512 UNPACK_ARITH(8, int16_t)
 513 UNPACK_ARITH(10, int32_t)
 514
 515 /**
 516  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 517  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 518  */
 519 static inline void codeblock(DiracContext *s, SubBand *b,
 520                              GetBitContext *gb, DiracArith *c,
 521                              int left, int right, int top, int bottom,
 522                              int blockcnt_one, int is_arith)
 523 {
 524     int x, y, zero_block;
 525     int qoffset, qfactor;
 526     uint8_t *buf;
 527
 528     /* check for any coded coefficients in this codeblock */
 529     if (!blockcnt_one) {
 530         if (is_arith)
 531             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 532         else
 533             zero_block = get_bits1(gb);
 534
 535         if (zero_block)
 536             return;
 537     }
 538
 539     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 540         int quant = b->quant;
 541         if (is_arith)
 542             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 543         else
 544             quant += dirac_get_se_golomb(gb);
 545         if (quant < 0) {
 546             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 547             return;
 548         }
 549         b->quant = quant;
 550     }
 551
 552     b->quant = FFMIN(b->quant, MAX_QUANT);
 553
 554     qfactor = qscale_tab[b->quant];
 555     /* TODO: context pointer? */
 556     if (!s->num_refs)
 557         qoffset = qoffset_intra_tab[b->quant];
 558     else
 559         qoffset = qoffset_inter_tab[b->quant];
 560
 561     buf = b->ibuf + top * b->stride;
 562     if (is_arith) {
 563         for (y = top; y < bottom; y++) {
 564             for (x = left; x < right; x++) {
 565                 if (b->pshift) {
 566                     coeff_unpack_arith_10(c, qfactor, qoffset, b, (int32_t*)(buf)+x, x, y);
 567                 } else {
 568                     coeff_unpack_arith_8(c, qfactor, qoffset, b, (int16_t*)(buf)+x, x, y);
 569                 }
 570             }
 571             buf += b->stride;
 572         }
 573     } else {
 574         for (y = top; y < bottom; y++) {
 575             for (x = left; x < right; x++) {
 576                 int val = coeff_unpack_golomb(gb, qfactor, qoffset);
 577                 if (b->pshift) {
 578                     AV_WN32(&buf[4*x], val);
 579                 } else {
 580                     AV_WN16(&buf[2*x], val);
 581                 }
 582             }
 583             buf += b->stride;
 584          }
 585      }
 586 }
 587
 588 /**
 589  * Dirac Specification ->
 590  * 13.3 intra_dc_prediction(band)
 591  */
 592 #define INTRA_DC_PRED(n, type) \
 593     static inline void intra_dc_prediction_##n(SubBand *b) \
 594     { \
 595         type *buf = (type*)b->ibuf; \
 596         int x, y; \
 597         \
 598         for (x = 1; x < b->width; x++) \
 599             buf[x] += buf[x-1]; \
 600         buf += (b->stride >> (1+b->pshift)); \
 601         \
 602         for (y = 1; y < b->height; y++) { \
 603             buf[0] += buf[-(b->stride >> (1+b->pshift))]; \
 604             \
 605             for (x = 1; x < b->width; x++) { \
 606                 int pred = buf[x - 1] + buf[x - (b->stride >> (1+b->pshift))] + buf[x - (b->stride >> (1+b->pshift))-1]; \
 607                 buf[x]  += divide3(pred); \
 608             } \
 609             buf += (b->stride >> (1+b->pshift)); \
 610         } \
 611     } \
 612
 613 INTRA_DC_PRED(8, int16_t)
 614 INTRA_DC_PRED(10, int32_t)
 615
 616 /**
 617  * Dirac Specification ->
 618  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 619  */
 620 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 621 {
 622     int cb_x, cb_y, left, right, top, bottom;
 623     DiracArith c;
 624     GetBitContext gb;
 625     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 626     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 627     int blockcnt_one = (cb_width + cb_height) == 2;
 628
 629     if (!b->length)
 630         return;
 631
 632     init_get_bits8(&gb, b->coeff_data, b->length);
 633
 634     if (is_arith)
 635         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 636
 637     top = 0;
 638     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 639         bottom = (b->height * (cb_y+1LL)) / cb_height;
 640         left = 0;
 641         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 642             right = (b->width * (cb_x+1LL)) / cb_width;
 643             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 644             left = right;
 645         }
 646         top = bottom;
 647     }
 648
 649     if (b->orientation == subband_ll && s->num_refs == 0) {
 650         if (s->pshift) {
 651             intra_dc_prediction_10(b);
 652         } else {
 653             intra_dc_prediction_8(b);
 654         }
 655     }
 656 }
 657
 658 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 659 {
 660     DiracContext *s = avctx->priv_data;
 661     decode_subband_internal(s, b, 1);
 662     return 0;
 663 }
 664
 665 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 666 {
 667     DiracContext *s = avctx->priv_data;
 668     SubBand **b     = arg;
 669     decode_subband_internal(s, *b, 0);
 670     return 0;
 671 }
 672
 673 /**
 674  * Dirac Specification ->
 675  * [DIRAC_STD] 13.4.1 core_transform_data()
 676  */
 677 static void decode_component(DiracContext *s, int comp)
 678 {
 679     AVCodecContext *avctx = s->avctx;
 680     SubBand *bands[3*MAX_DWT_LEVELS+1];
 681     enum dirac_subband orientation;
 682     int level, num_bands = 0;
 683
 684     /* Unpack all subbands at all levels. */
 685     for (level = 0; level < s->wavelet_depth; level++) {
 686         for (orientation = !!level; orientation < 4; orientation++) {
 687             SubBand *b = &s->plane[comp].band[level][orientation];
 688             bands[num_bands++] = b;
 689
 690             align_get_bits(&s->gb);
 691             /* [DIRAC_STD] 13.4.2 subband() */
 692             b->length = svq3_get_ue_golomb(&s->gb);
 693             if (b->length) {
 694                 b->quant = svq3_get_ue_golomb(&s->gb);
 695                 align_get_bits(&s->gb);
 696                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 697                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 698                 skip_bits_long(&s->gb, b->length*8);
 699             }
 700         }
 701         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 702         if (s->is_arith)
 703             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 704                            NULL, 4-!!level, sizeof(SubBand));
 705     }
 706     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 707     if (!s->is_arith)
 708         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 709 }
 710
 711 #define PARSE_VALUES(type, x, gb, ebits, buf1, buf2) \
 712     type *buf = (type *)buf1; \
 713     buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset); \
 714     if (get_bits_count(gb) >= ebits) \
 715         return; \
 716     if (buf2) { \
 717         buf = (type *)buf2; \
 718         buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset); \
 719         if (get_bits_count(gb) >= ebits) \
 720             return; \
 721     } \
 722
 723 static void decode_subband(DiracContext *s, GetBitContext *gb, int quant,
 724                            int slice_x, int slice_y, int bits_end,
 725                            SubBand *b1, SubBand *b2)
 726 {
 727     int left   = b1->width  * slice_x    / s->num_x;
 728     int right  = b1->width  *(slice_x+1) / s->num_x;
 729     int top    = b1->height * slice_y    / s->num_y;
 730     int bottom = b1->height *(slice_y+1) / s->num_y;
 731
 732     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 733     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 734
 735     uint8_t *buf1 =      b1->ibuf + top * b1->stride;
 736     uint8_t *buf2 = b2 ? b2->ibuf + top * b2->stride: NULL;
 737     int x, y;
 738     /* we have to constantly check for overread since the spec explicitly
 739        requires this, with the meaning that all remaining coeffs are set to 0 */
 740     if (get_bits_count(gb) >= bits_end)
 741         return;
 742
 743     if (s->pshift) {
 744         for (y = top; y < bottom; y++) {
 745             for (x = left; x < right; x++) {
 746                 PARSE_VALUES(int32_t, x, gb, bits_end, buf1, buf2);
 747             }
 748             buf1 += b1->stride;
 749             if (buf2)
 750                 buf2 += b2->stride;
 751         }
 752     }
 753     else {
 754         for (y = top; y < bottom; y++) {
 755             for (x = left; x < right; x++) {
 756                 PARSE_VALUES(int16_t, x, gb, bits_end, buf1, buf2);
 757             }
 758             buf1 += b1->stride;
 759             if (buf2)
 760                 buf2 += b2->stride;
 761         }
 762     }
 763 }
 764
 765 /* Used by Low Delay and High Quality profiles */
 766 typedef struct DiracSlice {
 767     GetBitContext gb;
 768     int slice_x;
 769     int slice_y;
 770     int bytes;
 771 } DiracSlice;
 772
 773
 774 /**
 775  * Dirac Specification ->
 776  * 13.5.2 Slices. slice(sx,sy)
 777  */
 778 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 779 {
 780     DiracContext *s = avctx->priv_data;
 781     DiracSlice *slice = arg;
 782     GetBitContext *gb = &slice->gb;
 783     enum dirac_subband orientation;
 784     int level, quant, chroma_bits, chroma_end;
 785
 786     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 787     int length_bits = av_log2(8 * slice->bytes)+1;
 788     int luma_bits   = get_bits_long(gb, length_bits);
 789     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 790
 791     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 792     for (level = 0; level < s->wavelet_depth; level++)
 793         for (orientation = !!level; orientation < 4; orientation++) {
 794             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 795             decode_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 796                            &s->plane[0].band[level][orientation], NULL);
 797         }
 798
 799     /* consume any unused bits from luma */
 800     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 801
 802     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 803     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 804     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 805     for (level = 0; level < s->wavelet_depth; level++)
 806         for (orientation = !!level; orientation < 4; orientation++) {
 807             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 808             decode_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 809                            &s->plane[1].band[level][orientation],
 810                            &s->plane[2].band[level][orientation]);
 811         }
 812
 813     return 0;
 814 }
 815
 816 /**
 817  * Dirac Specification ->
 818  * 13.5.1 low_delay_transform_data()
 819  */
 820 static int decode_lowdelay(DiracContext *s)
 821 {
 822     AVCodecContext *avctx = s->avctx;
 823     int slice_x, slice_y, bytes, bufsize;
 824     const uint8_t *buf;
 825     DiracSlice *slices;
 826     int slice_num = 0;
 827
 828     slices = av_mallocz_array(s->num_x, s->num_y * sizeof(DiracSlice));
 829     if (!slices)
 830         return AVERROR(ENOMEM);
 831
 832     align_get_bits(&s->gb);
 833     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 834     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 835     bufsize = get_bits_left(&s->gb);
 836
 837     for (slice_y = 0; bufsize > 0 && slice_y < s->num_y; slice_y++) {
 838         for (slice_x = 0; bufsize > 0 && slice_x < s->num_x; slice_x++) {
 839             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 840                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 841             slices[slice_num].bytes   = bytes;
 842             slices[slice_num].slice_x = slice_x;
 843             slices[slice_num].slice_y = slice_y;
 844             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 845             slice_num++;
 846
 847             buf     += bytes;
 848             if (bufsize/8 >= bytes)
 849                 bufsize -= bytes*8;
 850             else
 851                 bufsize = 0;
 852         }
 853     }
 854
 855     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 856                    sizeof(struct DiracSlice)); /* [DIRAC_STD] 13.5.2 Slices */
 857     if (s->pshift) {
 858         intra_dc_prediction_10(&s->plane[0].band[0][0]);
 859         intra_dc_prediction_10(&s->plane[1].band[0][0]);
 860         intra_dc_prediction_10(&s->plane[2].band[0][0]);
 861     } else {
 862         intra_dc_prediction_8(&s->plane[0].band[0][0]);
 863         intra_dc_prediction_8(&s->plane[1].band[0][0]);
 864         intra_dc_prediction_8(&s->plane[2].band[0][0]);
 865     }
 866     av_free(slices);
 867     return 0;
 868 }
 869
 870 static void init_planes(DiracContext *s)
 871 {
 872     int i, w, h, level, orientation;
 873
 874     for (i = 0; i < 3; i++) {
 875         Plane *p = &s->plane[i];
 876
 877         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 878         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 879         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 880         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 881         p->idwt_stride = FFALIGN(p->idwt_width << (1 + s->pshift), 8);
 882
 883         for (level = s->wavelet_depth-1; level >= 0; level--) {
 884             w = w>>1;
 885             h = h>>1;
 886             for (orientation = !!level; orientation < 4; orientation++) {
 887                 SubBand *b = &p->band[level][orientation];
 888
 889                 b->pshift = s->pshift;
 890                 b->ibuf   = p->idwt_buf;
 891                 b->level  = level;
 892                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 893                 b->width  = w;
 894                 b->height = h;
 895                 b->orientation = orientation;
 896
 897                 if (orientation & 1)
 898                     b->ibuf += w << (1+b->pshift);
 899                 if (orientation > 1)
 900                     b->ibuf += (b->stride>>1);
 901
 902                 if (level)
 903                     b->parent = &p->band[level-1][orientation];
 904             }
 905         }
 906
 907         if (i > 0) {
 908             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 909             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 910             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 911             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 912         }
 913
 914         p->xoffset = (p->xblen - p->xbsep)/2;
 915         p->yoffset = (p->yblen - p->ybsep)/2;
 916     }
 917 }
 918
 919 /**
 920  * Unpack the motion compensation parameters
 921  * Dirac Specification ->
 922  * 11.2 Picture prediction data. picture_prediction()
 923  */
 924 static int dirac_unpack_prediction_parameters(DiracContext *s)
 925 {
 926     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 927
 928     GetBitContext *gb = &s->gb;
 929     unsigned idx, ref;
 930
 931     align_get_bits(gb);
 932     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 933     /* Luma and Chroma are equal. 11.2.3 */
 934     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 935
 936     if (idx > 4) {
 937         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 938         return AVERROR_INVALIDDATA;
 939     }
 940
 941     if (idx == 0) {
 942         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 943         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 944         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 945         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 946     } else {
 947         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 948         s->plane[0].xblen = default_blen[idx-1];
 949         s->plane[0].yblen = default_blen[idx-1];
 950         s->plane[0].xbsep = 4 * idx;
 951         s->plane[0].ybsep = 4 * idx;
 952     }
 953     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 954       Calculated in function dirac_unpack_block_motion_data */
 955
 956     if (s->plane[0].xblen % (1 << s->chroma_x_shift) != 0 ||
 957         s->plane[0].yblen % (1 << s->chroma_y_shift) != 0 ||
 958         !s->plane[0].xblen || !s->plane[0].yblen) {
 959         av_log(s->avctx, AV_LOG_ERROR,
 960                "invalid x/y block length (%d/%d) for x/y chroma shift (%d/%d)\n",
 961                s->plane[0].xblen, s->plane[0].yblen, s->chroma_x_shift, s->chroma_y_shift);
 962         return AVERROR_INVALIDDATA;
 963     }
 964     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 965         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 966         return AVERROR_INVALIDDATA;
 967     }
 968     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 969         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 970         return AVERROR_INVALIDDATA;
 971     }
 972     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 973         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 974         return AVERROR_PATCHWELCOME;
 975     }
 976
 977     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 978       Read motion vector precision */
 979     s->mv_precision = svq3_get_ue_golomb(gb);
 980     if (s->mv_precision > 3) {
 981         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 982         return AVERROR_INVALIDDATA;
 983     }
 984
 985     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 986       Read the global motion compensation parameters */
 987     s->globalmc_flag = get_bits1(gb);
 988     if (s->globalmc_flag) {
 989         memset(s->globalmc, 0, sizeof(s->globalmc));
 990         /* [DIRAC_STD] pan_tilt(gparams) */
 991         for (ref = 0; ref < s->num_refs; ref++) {
 992             if (get_bits1(gb)) {
 993                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 994                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 995             }
 996             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 997                zoom/rotation/shear parameters */
 998             if (get_bits1(gb)) {
 999                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
1000                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
1001                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
1002                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
1003                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
1004             } else {
1005                 s->globalmc[ref].zrs[0][0] = 1;
1006                 s->globalmc[ref].zrs[1][1] = 1;
1007             }
1008             /* [DIRAC_STD] perspective(gparams) */
1009             if (get_bits1(gb)) {
1010                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
1011                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
1012                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
1013             }
1014         }
1015     }
1016
1017     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
1018       Picture prediction mode, not currently used. */
1019     if (svq3_get_ue_golomb(gb)) {
1020         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
1021         return AVERROR_INVALIDDATA;
1022     }
1023
1024     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
1025        just data read, weight calculation will be done later on. */
1026     s->weight_log2denom = 1;
1027     s->weight[0]        = 1;
1028     s->weight[1]        = 1;
1029
1030     if (get_bits1(gb)) {
1031         s->weight_log2denom = svq3_get_ue_golomb(gb);
1032         s->weight[0] = dirac_get_se_golomb(gb);
1033         if (s->num_refs == 2)
1034             s->weight[1] = dirac_get_se_golomb(gb);
1035     }
1036     return 0;
1037 }
1038
1039 /**
1040  * Dirac Specification ->
1041  * 11.3 Wavelet transform data. wavelet_transform()
1042  */
1043 static int dirac_unpack_idwt_params(DiracContext *s)
1044 {
1045     GetBitContext *gb = &s->gb;
1046     int i, level;
1047     unsigned tmp;
1048
1049 #define CHECKEDREAD(dst, cond, errmsg) \
1050     tmp = svq3_get_ue_golomb(gb); \
1051     if (cond) { \
1052         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
1053         return AVERROR_INVALIDDATA; \
1054     }\
1055     dst = tmp;
1056
1057     align_get_bits(gb);
1058
1059     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
1060     if (s->zero_res)
1061         return 0;
1062
1063     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
1064     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1065
1066     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1067
1068     if (!s->low_delay) {
1069         /* Codeblock parameters (core syntax only) */
1070         if (get_bits1(gb)) {
1071             for (i = 0; i <= s->wavelet_depth; i++) {
1072                 CHECKEDREAD(s->codeblock[i].width , tmp < 1 || tmp > (s->avctx->width >>s->wavelet_depth-i), "codeblock width invalid\n")
1073                 CHECKEDREAD(s->codeblock[i].height, tmp < 1 || tmp > (s->avctx->height>>s->wavelet_depth-i), "codeblock height invalid\n")
1074             }
1075
1076             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1077         } else
1078             for (i = 0; i <= s->wavelet_depth; i++)
1079                 s->codeblock[i].width = s->codeblock[i].height = 1;
1080     } else {
1081         /* Slice parameters + quantization matrix*/
1082         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1083         s->num_x     = svq3_get_ue_golomb(gb);
1084         s->num_y     = svq3_get_ue_golomb(gb);
1085         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1086         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1087
1088         if (s->lowdelay.bytes.den <= 0) {
1089             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1090             return AVERROR_INVALIDDATA;
1091         }
1092
1093         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1094         if (get_bits1(gb)) {
1095             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1096             /* custom quantization matrix */
1097             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1098             for (level = 0; level < s->wavelet_depth; level++) {
1099                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1100                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1101                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1102             }
1103         } else {
1104             if (s->wavelet_depth > 4) {
1105                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1106                 return AVERROR_INVALIDDATA;
1107             }
1108             /* default quantization matrix */
1109             for (level = 0; level < s->wavelet_depth; level++)
1110                 for (i = 0; i < 4; i++) {
1111                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1112                     /* haar with no shift differs for different depths */
1113                     if (s->wavelet_idx == 3)
1114                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1115                 }
1116         }
1117     }
1118     return 0;
1119 }
1120
1121 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1122 {
1123     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1124
1125     if (!(x|y))
1126         return 0;
1127     else if (!y)
1128         return sbsplit[-1];
1129     else if (!x)
1130         return sbsplit[-stride];
1131
1132     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1133 }
1134
1135 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1136 {
1137     int pred;
1138
1139     if (!(x|y))
1140         return 0;
1141     else if (!y)
1142         return block[-1].ref & refmask;
1143     else if (!x)
1144         return block[-stride].ref & refmask;
1145
1146     /* return the majority */
1147     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1148     return (pred >> 1) & refmask;
1149 }
1150
1151 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1152 {
1153     int i, n = 0;
1154
1155     memset(block->u.dc, 0, sizeof(block->u.dc));
1156
1157     if (x && !(block[-1].ref & 3)) {
1158         for (i = 0; i < 3; i++)
1159             block->u.dc[i] += block[-1].u.dc[i];
1160         n++;
1161     }
1162
1163     if (y && !(block[-stride].ref & 3)) {
1164         for (i = 0; i < 3; i++)
1165             block->u.dc[i] += block[-stride].u.dc[i];
1166         n++;
1167     }
1168
1169     if (x && y && !(block[-1-stride].ref & 3)) {
1170         for (i = 0; i < 3; i++)
1171             block->u.dc[i] += block[-1-stride].u.dc[i];
1172         n++;
1173     }
1174
1175     if (n == 2) {
1176         for (i = 0; i < 3; i++)
1177             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1178     } else if (n == 3) {
1179         for (i = 0; i < 3; i++)
1180             block->u.dc[i] = divide3(block->u.dc[i]);
1181     }
1182 }
1183
1184 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1185 {
1186     int16_t *pred[3];
1187     int refmask = ref+1;
1188     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1189     int n = 0;
1190
1191     if (x && (block[-1].ref & mask) == refmask)
1192         pred[n++] = block[-1].u.mv[ref];
1193
1194     if (y && (block[-stride].ref & mask) == refmask)
1195         pred[n++] = block[-stride].u.mv[ref];
1196
1197     if (x && y && (block[-stride-1].ref & mask) == refmask)
1198         pred[n++] = block[-stride-1].u.mv[ref];
1199
1200     switch (n) {
1201     case 0:
1202         block->u.mv[ref][0] = 0;
1203         block->u.mv[ref][1] = 0;
1204         break;
1205     case 1:
1206         block->u.mv[ref][0] = pred[0][0];
1207         block->u.mv[ref][1] = pred[0][1];
1208         break;
1209     case 2:
1210         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1211         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1212         break;
1213     case 3:
1214         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1215         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1216         break;
1217     }
1218 }
1219
1220 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1221 {
1222     int ez      = s->globalmc[ref].zrs_exp;
1223     int ep      = s->globalmc[ref].perspective_exp;
1224     int (*A)[2] = s->globalmc[ref].zrs;
1225     int *b      = s->globalmc[ref].pan_tilt;
1226     int *c      = s->globalmc[ref].perspective;
1227
1228     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1229     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1230     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1231
1232     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1233     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1234 }
1235
1236 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1237                                 int stride, int x, int y)
1238 {
1239     int i;
1240
1241     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1242     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1243
1244     if (s->num_refs == 2) {
1245         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1246         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1247     }
1248
1249     if (!block->ref) {
1250         pred_block_dc(block, stride, x, y);
1251         for (i = 0; i < 3; i++)
1252             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1253         return;
1254     }
1255
1256     if (s->globalmc_flag) {
1257         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1258         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1259     }
1260
1261     for (i = 0; i < s->num_refs; i++)
1262         if (block->ref & (i+1)) {
1263             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1264                 global_mv(s, block, x, y, i);
1265             } else {
1266                 pred_mv(block, stride, x, y, i);
1267                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1268                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1269             }
1270         }
1271 }
1272
1273 /**
1274  * Copies the current block to the other blocks covered by the current superblock split mode
1275  */
1276 static void propagate_block_data(DiracBlock *block, int stride, int size)
1277 {
1278     int x, y;
1279     DiracBlock *dst = block;
1280
1281     for (x = 1; x < size; x++)
1282         dst[x] = *block;
1283
1284     for (y = 1; y < size; y++) {
1285         dst += stride;
1286         for (x = 0; x < size; x++)
1287             dst[x] = *block;
1288     }
1289 }
1290
1291 /**
1292  * Dirac Specification ->
1293  * 12. Block motion data syntax
1294  */
1295 static int dirac_unpack_block_motion_data(DiracContext *s)
1296 {
1297     GetBitContext *gb = &s->gb;
1298     uint8_t *sbsplit = s->sbsplit;
1299     int i, x, y, q, p;
1300     DiracArith arith[8];
1301
1302     align_get_bits(gb);
1303
1304     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1305     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1306     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1307     s->blwidth  = 4 * s->sbwidth;
1308     s->blheight = 4 * s->sbheight;
1309
1310     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1311        decode superblock split modes */
1312     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1313     for (y = 0; y < s->sbheight; y++) {
1314         for (x = 0; x < s->sbwidth; x++) {
1315             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1316             if (split > 2)
1317                 return AVERROR_INVALIDDATA;
1318             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1319         }
1320         sbsplit += s->sbwidth;
1321     }
1322
1323     /* setup arith decoding */
1324     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1325     for (i = 0; i < s->num_refs; i++) {
1326         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1327         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1328     }
1329     for (i = 0; i < 3; i++)
1330         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1331
1332     for (y = 0; y < s->sbheight; y++)
1333         for (x = 0; x < s->sbwidth; x++) {
1334             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1335             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1336
1337             for (q = 0; q < blkcnt; q++)
1338                 for (p = 0; p < blkcnt; p++) {
1339                     int bx = 4 * x + p*step;
1340                     int by = 4 * y + q*step;
1341                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1342                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1343                     propagate_block_data(block, s->blwidth, step);
1344                 }
1345         }
1346
1347     return 0;
1348 }
1349
1350 static int weight(int i, int blen, int offset)
1351 {
1352 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1353     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1354
1355     if (i < 2*offset)
1356         return ROLLOFF(i);
1357     else if (i > blen-1 - 2*offset)
1358         return ROLLOFF(blen-1 - i);
1359     return 8;
1360 }
1361
1362 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1363                                  int left, int right, int wy)
1364 {
1365     int x;
1366     for (x = 0; left && x < p->xblen >> 1; x++)
1367         obmc_weight[x] = wy*8;
1368     for (; x < p->xblen >> right; x++)
1369         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1370     for (; x < p->xblen; x++)
1371         obmc_weight[x] = wy*8;
1372     for (; x < stride; x++)
1373         obmc_weight[x] = 0;
1374 }
1375
1376 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1377                              int left, int right, int top, int bottom)
1378 {
1379     int y;
1380     for (y = 0; top && y < p->yblen >> 1; y++) {
1381         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1382         obmc_weight += stride;
1383     }
1384     for (; y < p->yblen >> bottom; y++) {
1385         int wy = weight(y, p->yblen, p->yoffset);
1386         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1387         obmc_weight += stride;
1388     }
1389     for (; y < p->yblen; y++) {
1390         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1391         obmc_weight += stride;
1392     }
1393 }
1394
1395 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1396 {
1397     int top = !by;
1398     int bottom = by == s->blheight-1;
1399
1400     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1401     if (top || bottom || by == 1) {
1402         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1403         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1404         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1405     }
1406 }
1407
1408 static const uint8_t epel_weights[4][4][4] = {
1409     {{ 16,  0,  0,  0 },
1410      { 12,  4,  0,  0 },
1411      {  8,  8,  0,  0 },
1412      {  4, 12,  0,  0 }},
1413     {{ 12,  0,  4,  0 },
1414      {  9,  3,  3,  1 },
1415      {  6,  6,  2,  2 },
1416      {  3,  9,  1,  3 }},
1417     {{  8,  0,  8,  0 },
1418      {  6,  2,  6,  2 },
1419      {  4,  4,  4,  4 },
1420      {  2,  6,  2,  6 }},
1421     {{  4,  0, 12,  0 },
1422      {  3,  1,  9,  3 },
1423      {  2,  2,  6,  6 },
1424      {  1,  3,  3,  9 }}
1425 };
1426
1427 /**
1428  * For block x,y, determine which of the hpel planes to do bilinear
1429  * interpolation from and set src[] to the location in each hpel plane
1430  * to MC from.
1431  *
1432  * @return the index of the put_dirac_pixels_tab function to use
1433  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1434  */
1435 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1436                      int x, int y, int ref, int plane)
1437 {
1438     Plane *p = &s->plane[plane];
1439     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1440     int motion_x = block->u.mv[ref][0];
1441     int motion_y = block->u.mv[ref][1];
1442     int mx, my, i, epel, nplanes = 0;
1443
1444     if (plane) {
1445         motion_x >>= s->chroma_x_shift;
1446         motion_y >>= s->chroma_y_shift;
1447     }
1448
1449     mx         = motion_x & ~(-1U << s->mv_precision);
1450     my         = motion_y & ~(-1U << s->mv_precision);
1451     motion_x >>= s->mv_precision;
1452     motion_y >>= s->mv_precision;
1453     /* normalize subpel coordinates to epel */
1454     /* TODO: template this function? */
1455     mx      <<= 3 - s->mv_precision;
1456     my      <<= 3 - s->mv_precision;
1457
1458     x += motion_x;
1459     y += motion_y;
1460     epel = (mx|my)&1;
1461
1462     /* hpel position */
1463     if (!((mx|my)&3)) {
1464         nplanes = 1;
1465         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1466     } else {
1467         /* qpel or epel */
1468         nplanes = 4;
1469         for (i = 0; i < 4; i++)
1470             src[i] = ref_hpel[i] + y*p->stride + x;
1471
1472         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1473            we increment x/y because the edge changes for half of the pixels */
1474         if (mx > 4) {
1475             src[0] += 1;
1476             src[2] += 1;
1477             x++;
1478         }
1479         if (my > 4) {
1480             src[0] += p->stride;
1481             src[1] += p->stride;
1482             y++;
1483         }
1484
1485         /* hpel planes are:
1486            [0]: F  [1]: H
1487            [2]: V  [3]: C */
1488         if (!epel) {
1489             /* check if we really only need 2 planes since either mx or my is
1490                a hpel position. (epel weights of 0 handle this there) */
1491             if (!(mx&3)) {
1492                 /* mx == 0: average [0] and [2]
1493                    mx == 4: average [1] and [3] */
1494                 src[!mx] = src[2 + !!mx];
1495                 nplanes = 2;
1496             } else if (!(my&3)) {
1497                 src[0] = src[(my>>1)  ];
1498                 src[1] = src[(my>>1)+1];
1499                 nplanes = 2;
1500             }
1501         } else {
1502             /* adjust the ordering if needed so the weights work */
1503             if (mx > 4) {
1504                 FFSWAP(const uint8_t *, src[0], src[1]);
1505                 FFSWAP(const uint8_t *, src[2], src[3]);
1506             }
1507             if (my > 4) {
1508                 FFSWAP(const uint8_t *, src[0], src[2]);
1509                 FFSWAP(const uint8_t *, src[1], src[3]);
1510             }
1511             src[4] = epel_weights[my&3][mx&3];
1512         }
1513     }
1514
1515     /* fixme: v/h _edge_pos */
1516     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1517         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1518         x < 0 || y < 0) {
1519         for (i = 0; i < nplanes; i++) {
1520             s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1521                                      p->stride, p->stride,
1522                                      p->xblen, p->yblen, x, y,
1523                                      p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1524             src[i] = s->edge_emu_buffer[i];
1525         }
1526     }
1527     return (nplanes>>1) + epel;
1528 }
1529
1530 static void add_dc(uint16_t *dst, int dc, int stride,
1531                    uint8_t *obmc_weight, int xblen, int yblen)
1532 {
1533     int x, y;
1534     dc += 128;
1535
1536     for (y = 0; y < yblen; y++) {
1537         for (x = 0; x < xblen; x += 2) {
1538             dst[x  ] += dc * obmc_weight[x  ];
1539             dst[x+1] += dc * obmc_weight[x+1];
1540         }
1541         dst          += stride;
1542         obmc_weight  += MAX_BLOCKSIZE;
1543     }
1544 }
1545
1546 static void block_mc(DiracContext *s, DiracBlock *block,
1547                      uint16_t *mctmp, uint8_t *obmc_weight,
1548                      int plane, int dstx, int dsty)
1549 {
1550     Plane *p = &s->plane[plane];
1551     const uint8_t *src[5];
1552     int idx;
1553
1554     switch (block->ref&3) {
1555     case 0: /* DC */
1556         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1557         return;
1558     case 1:
1559     case 2:
1560         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1561         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1562         if (s->weight_func)
1563             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1564                            s->weight[0] + s->weight[1], p->yblen);
1565         break;
1566     case 3:
1567         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1568         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1569         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1570         if (s->biweight_func) {
1571             /* fixme: +32 is a quick hack */
1572             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1573             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1574                              s->weight[0], s->weight[1], p->yblen);
1575         } else
1576             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1577         break;
1578     }
1579     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1580 }
1581
1582 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1583 {
1584     Plane *p = &s->plane[plane];
1585     int x, dstx = p->xbsep - p->xoffset;
1586
1587     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1588     mctmp += p->xbsep;
1589
1590     for (x = 1; x < s->blwidth-1; x++) {
1591         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1592         dstx  += p->xbsep;
1593         mctmp += p->xbsep;
1594     }
1595     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1596 }
1597
1598 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1599 {
1600     int idx = 0;
1601     if (xblen > 8)
1602         idx = 1;
1603     if (xblen > 16)
1604         idx = 2;
1605
1606     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1607     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1608     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1609     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1610         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1611         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1612     } else {
1613         s->weight_func   = NULL;
1614         s->biweight_func = NULL;
1615     }
1616 }
1617
1618 static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1619 {
1620     /* chroma allocates an edge of 8 when subsampled
1621        which for 4:2:2 means an h edge of 16 and v edge of 8
1622        just use 8 for everything for the moment */
1623     int i, edge = EDGE_WIDTH/2;
1624
1625     ref->hpel[plane][0] = ref->avframe->data[plane];
1626     s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1627
1628     /* no need for hpel if we only have fpel vectors */
1629     if (!s->mv_precision)
1630         return 0;
1631
1632     for (i = 1; i < 4; i++) {
1633         if (!ref->hpel_base[plane][i])
1634             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1635         if (!ref->hpel_base[plane][i]) {
1636             return AVERROR(ENOMEM);
1637         }
1638         /* we need to be 16-byte aligned even for chroma */
1639         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1640     }
1641
1642     if (!ref->interpolated[plane]) {
1643         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1644                                       ref->hpel[plane][3], ref->hpel[plane][0],
1645                                       ref->avframe->linesize[plane], width, height);
1646         s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1647         s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1648         s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1649     }
1650     ref->interpolated[plane] = 1;
1651
1652     return 0;
1653 }
1654
1655 /**
1656  * Dirac Specification ->
1657  * 13.0 Transform data syntax. transform_data()
1658  */
1659 static int dirac_decode_frame_internal(DiracContext *s)
1660 {
1661     DWTContext d;
1662     int y, i, comp, dsty;
1663     int ret;
1664
1665     if (s->low_delay) {
1666         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1667         for (comp = 0; comp < 3; comp++) {
1668             Plane *p = &s->plane[comp];
1669             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height);
1670         }
1671         if (!s->zero_res) {
1672             if ((ret = decode_lowdelay(s)) < 0)
1673                 return ret;
1674         }
1675     }
1676
1677     for (comp = 0; comp < 3; comp++) {
1678         Plane *p       = &s->plane[comp];
1679         uint8_t *frame = s->current_picture->avframe->data[comp];
1680
1681         /* FIXME: small resolutions */
1682         for (i = 0; i < 4; i++)
1683             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1684
1685         if (!s->zero_res && !s->low_delay)
1686         {
1687             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height);
1688             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1689         }
1690         ret = ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1691                                     s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp, s->bit_depth);
1692         if (ret < 0)
1693             return ret;
1694
1695         if (!s->num_refs) { /* intra */
1696             for (y = 0; y < p->height; y += 16) {
1697                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1698                 s->diracdsp.put_signed_rect_clamped[s->pshift](frame + y*p->stride, p->stride,
1699                                                                p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1700             }
1701         } else { /* inter */
1702             int rowheight = p->ybsep*p->stride;
1703
1704             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1705
1706             for (i = 0; i < s->num_refs; i++) {
1707                 int ret = interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1708                 if (ret < 0)
1709                     return ret;
1710             }
1711
1712             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1713
1714             dsty = -p->yoffset;
1715             for (y = 0; y < s->blheight; y++) {
1716                 int h     = 0,
1717                     start = FFMAX(dsty, 0);
1718                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1719                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1720
1721                 init_obmc_weights(s, p, y);
1722
1723                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1724                     h = p->height - start;
1725                 else
1726                     h = p->ybsep - (start - dsty);
1727                 if (h < 0)
1728                     break;
1729
1730                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1731                 mc_row(s, blocks, mctmp, comp, dsty);
1732
1733                 mctmp += (start - dsty)*p->stride + p->xoffset;
1734                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1735                 /* NOTE: add_rect_clamped hasn't been templated hence the shifts.
1736                  * idwt_stride is passed as pixels, not in bytes as in the rest of the decoder */
1737                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1738                                              (int16_t*)(p->idwt_buf) + start*(p->idwt_stride >> 1), (p->idwt_stride >> 1), p->width, h);
1739
1740                 dsty += p->ybsep;
1741             }
1742         }
1743     }
1744
1745
1746     return 0;
1747 }
1748
1749 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1750 {
1751     int ret, i;
1752     int chroma_x_shift, chroma_y_shift;
1753     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1754
1755     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1756     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1757     ret = ff_get_buffer(avctx, f, flags);
1758     if (ret < 0)
1759         return ret;
1760
1761     for (i = 0; f->data[i]; i++) {
1762         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1763                      f->linesize[i] + 32;
1764         f->data[i] += offset;
1765     }
1766     f->width  = avctx->width;
1767     f->height = avctx->height;
1768
1769     return 0;
1770 }
1771
1772 /**
1773  * Dirac Specification ->
1774  * 11.1.1 Picture Header. picture_header()
1775  */
1776 static int dirac_decode_picture_header(DiracContext *s)
1777 {
1778     unsigned retire, picnum;
1779     int i, j, ret;
1780     int64_t refdist, refnum;
1781     GetBitContext *gb = &s->gb;
1782
1783     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1784     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1785
1786
1787     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1788
1789     /* if this is the first keyframe after a sequence header, start our
1790        reordering from here */
1791     if (s->frame_number < 0)
1792         s->frame_number = picnum;
1793
1794     s->ref_pics[0] = s->ref_pics[1] = NULL;
1795     for (i = 0; i < s->num_refs; i++) {
1796         refnum = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
1797         refdist = INT64_MAX;
1798
1799         /* find the closest reference to the one we want */
1800         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1801         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1802             if (s->ref_frames[j]
1803                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1804                 s->ref_pics[i] = s->ref_frames[j];
1805                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1806             }
1807
1808         if (!s->ref_pics[i] || refdist)
1809             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1810
1811         /* if there were no references at all, allocate one */
1812         if (!s->ref_pics[i])
1813             for (j = 0; j < MAX_FRAMES; j++)
1814                 if (!s->all_frames[j].avframe->data[0]) {
1815                     s->ref_pics[i] = &s->all_frames[j];
1816                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1817                     break;
1818                 }
1819
1820         if (!s->ref_pics[i]) {
1821             av_log(s->avctx, AV_LOG_ERROR, "Reference could not be allocated\n");
1822             return AVERROR_INVALIDDATA;
1823         }
1824
1825     }
1826
1827     /* retire the reference frames that are not used anymore */
1828     if (s->current_picture->reference) {
1829         retire = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
1830         if (retire != picnum) {
1831             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1832
1833             if (retire_pic)
1834                 retire_pic->reference &= DELAYED_PIC_REF;
1835             else
1836                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1837         }
1838
1839         /* if reference array is full, remove the oldest as per the spec */
1840         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1841             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1842             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->reference &= DELAYED_PIC_REF;
1843         }
1844     }
1845
1846     if (s->num_refs) {
1847         ret = dirac_unpack_prediction_parameters(s);  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1848         if (ret < 0)
1849             return ret;
1850         ret = dirac_unpack_block_motion_data(s);      /* [DIRAC_STD] 12. Block motion data syntax                       */
1851         if (ret < 0)
1852             return ret;
1853     }
1854     ret = dirac_unpack_idwt_params(s);                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1855     if (ret < 0)
1856         return ret;
1857
1858     init_planes(s);
1859     return 0;
1860 }
1861
1862 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1863 {
1864     DiracFrame *out = s->delay_frames[0];
1865     int i, out_idx  = 0;
1866     int ret;
1867
1868     /* find frame with lowest picture number */
1869     for (i = 1; s->delay_frames[i]; i++)
1870         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1871             out     = s->delay_frames[i];
1872             out_idx = i;
1873         }
1874
1875     for (i = out_idx; s->delay_frames[i]; i++)
1876         s->delay_frames[i] = s->delay_frames[i+1];
1877
1878     if (out) {
1879         out->reference ^= DELAYED_PIC_REF;
1880         *got_frame = 1;
1881         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1882             return ret;
1883     }
1884
1885     return 0;
1886 }
1887
1888 /**
1889  * Dirac Specification ->
1890  * 9.6 Parse Info Header Syntax. parse_info()
1891  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1892  */
1893 #define DATA_UNIT_HEADER_SIZE 13
1894
1895 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1896    inside the function parse_sequence() */
1897 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1898 {
1899     DiracContext *s   = avctx->priv_data;
1900     DiracFrame *pic   = NULL;
1901     int ret, i, parse_code;
1902     unsigned tmp;
1903
1904     if (size < DATA_UNIT_HEADER_SIZE)
1905         return AVERROR_INVALIDDATA;
1906
1907     parse_code = buf[4];
1908
1909     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1910
1911     if (parse_code == pc_seq_header) {
1912         if (s->seen_sequence_header)
1913             return 0;
1914
1915         /* [DIRAC_STD] 10. Sequence header */
1916         ret = avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source,
1917                                                  &s->bit_depth);
1918         if (ret < 0)
1919             return ret;
1920
1921         s->pshift = s->bit_depth > 8;
1922
1923         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1924
1925         ret = alloc_sequence_buffers(s);
1926         if (ret < 0)
1927             return ret;
1928
1929         s->seen_sequence_header = 1;
1930     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1931         free_sequence_buffers(s);
1932         s->seen_sequence_header = 0;
1933     } else if (parse_code == pc_aux_data) {
1934         if (buf[13] == 1) {     /* encoder implementation/version */
1935             int ver[3];
1936             /* versions older than 1.0.8 don't store quant delta for
1937                subbands with only one codeblock */
1938             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1939                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1940                     s->old_delta_quant = 1;
1941         }
1942     } else if (parse_code & 0x8) {  /* picture data unit */
1943         if (!s->seen_sequence_header) {
1944             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1945             return AVERROR_INVALIDDATA;
1946         }
1947
1948         /* find an unused frame */
1949         for (i = 0; i < MAX_FRAMES; i++)
1950             if (s->all_frames[i].avframe->data[0] == NULL)
1951                 pic = &s->all_frames[i];
1952         if (!pic) {
1953             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1954             return AVERROR_INVALIDDATA;
1955         }
1956
1957         av_frame_unref(pic->avframe);
1958
1959         /* [DIRAC_STD] Defined in 9.6.1 ... */
1960         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1961         if (tmp > 2) {
1962             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1963             return AVERROR_INVALIDDATA;
1964         }
1965         s->num_refs    = tmp;
1966         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1967         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1968         pic->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1969         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1970         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1971
1972         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1973             return ret;
1974         s->current_picture = pic;
1975         s->plane[0].stride = pic->avframe->linesize[0];
1976         s->plane[1].stride = pic->avframe->linesize[1];
1977         s->plane[2].stride = pic->avframe->linesize[2];
1978
1979         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1980             return AVERROR(ENOMEM);
1981
1982         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1983         ret = dirac_decode_picture_header(s);
1984         if (ret < 0)
1985             return ret;
1986
1987         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1988         ret = dirac_decode_frame_internal(s);
1989         if (ret < 0)
1990             return ret;
1991     }
1992     return 0;
1993 }
1994
1995 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1996 {
1997     DiracContext *s     = avctx->priv_data;
1998     AVFrame *picture    = data;
1999     uint8_t *buf        = pkt->data;
2000     int buf_size        = pkt->size;
2001     int i, buf_idx      = 0;
2002     int ret;
2003     unsigned data_unit_size;
2004
2005     /* release unused frames */
2006     for (i = 0; i < MAX_FRAMES; i++)
2007         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].reference) {
2008             av_frame_unref(s->all_frames[i].avframe);
2009             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
2010         }
2011
2012     s->current_picture = NULL;
2013     *got_frame = 0;
2014
2015     /* end of stream, so flush delayed pics */
2016     if (buf_size == 0)
2017         return get_delayed_pic(s, (AVFrame *)data, got_frame);
2018
2019     for (;;) {
2020         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
2021           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
2022           BBCD start code search */
2023         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
2024             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
2025                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
2026                 break;
2027         }
2028         /* BBCD found or end of data */
2029         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
2030             break;
2031
2032         data_unit_size = AV_RB32(buf+buf_idx+5);
2033         if (data_unit_size > buf_size - buf_idx || !data_unit_size) {
2034             if(data_unit_size > buf_size - buf_idx)
2035             av_log(s->avctx, AV_LOG_ERROR,
2036                    "Data unit with size %d is larger than input buffer, discarding\n",
2037                    data_unit_size);
2038             buf_idx += 4;
2039             continue;
2040         }
2041         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
2042         ret = dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size);
2043         if (ret < 0)
2044         {
2045             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
2046             return ret;
2047         }
2048         buf_idx += data_unit_size;
2049     }
2050
2051     if (!s->current_picture)
2052         return buf_size;
2053
2054     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
2055         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
2056
2057         s->current_picture->reference |= DELAYED_PIC_REF;
2058
2059         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
2060             int min_num = s->delay_frames[0]->avframe->display_picture_number;
2061             /* Too many delayed frames, so we display the frame with the lowest pts */
2062             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
2063
2064             for (i = 1; s->delay_frames[i]; i++)
2065                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
2066                     min_num = s->delay_frames[i]->avframe->display_picture_number;
2067
2068             delayed_frame = remove_frame(s->delay_frames, min_num);
2069             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
2070         }
2071
2072         if (delayed_frame) {
2073             delayed_frame->reference ^= DELAYED_PIC_REF;
2074             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
2075                 return ret;
2076             *got_frame = 1;
2077         }
2078     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
2079         /* The right frame at the right time :-) */
2080         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
2081             return ret;
2082         *got_frame = 1;
2083     }
2084
2085     if (*got_frame)
2086         s->frame_number = picture->display_picture_number + 1;
2087
2088     return buf_idx;
2089 }
2090
2091 AVCodec ff_dirac_decoder = {
2092     .name           = "dirac",
2093     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
2094     .type           = AVMEDIA_TYPE_VIDEO,
2095     .id             = AV_CODEC_ID_DIRAC,
2096     .priv_data_size = sizeof(DiracContext),
2097     .init           = dirac_decode_init,
2098     .close          = dirac_decode_end,
2099     .decode         = dirac_decode_frame,
2100     .capabilities   = AV_CODEC_CAP_DELAY,
2101     .flush          = dirac_decode_flush,
2102 };