git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "dsputil.h"
  31 #include "get_bits.h"
  32 #include "bytestream.h"
  33 #include "internal.h"
  34 #include "golomb.h"
  35 #include "dirac_arith.h"
  36 #include "mpeg12data.h"
  37 #include "dirac_dwt.h"
  38 #include "dirac.h"
  39 #include "diracdsp.h"
  40 #include "videodsp.h" // for ff_emulated_edge_mc_8
  41
  42 /**
  43  * The spec limits the number of wavelet decompositions to 4 for both
  44  * level 1 (VC-2) and 128 (long-gop default).
  45  * 5 decompositions is the maximum before >16-bit buffers are needed.
  46  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  47  * the others to 4 decompositions (or 3 for the fidelity filter).
  48  *
  49  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  50  */
  51 #define MAX_DWT_LEVELS 5
  52
  53 /**
  54  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  55  */
  56 #define MAX_REFERENCE_FRAMES 8
  57 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  58 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  59 #define MAX_QUANT 68        /* max quant for VC-2 */
  60 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  61
  62 /**
  63  * DiracBlock->ref flags, if set then the block does MC from the given ref
  64  */
  65 #define DIRAC_REF_MASK_REF1   1
  66 #define DIRAC_REF_MASK_REF2   2
  67 #define DIRAC_REF_MASK_GLOBAL 4
  68
  69 /**
  70  * Value of Picture.reference when Picture is not a reference picture, but
  71  * is held for delayed output.
  72  */
  73 #define DELAYED_PIC_REF 4
  74
  75 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
  76
  77 #define CALC_PADDING(size, depth)                       \
  78     (((size + (1 << depth) - 1) >> depth) << depth)
  79
  80 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  81
  82 typedef struct {
  83     AVFrame *avframe;
  84     int interpolated[3];    /* 1 if hpel[] is valid */
  85     uint8_t *hpel[3][4];
  86     uint8_t *hpel_base[3][4];
  87 } DiracFrame;
  88
  89 typedef struct {
  90     union {
  91         int16_t mv[2][2];
  92         int16_t dc[3];
  93     } u; /* anonymous unions aren't in C99 :( */
  94     uint8_t ref;
  95 } DiracBlock;
  96
  97 typedef struct SubBand {
  98     int level;
  99     int orientation;
 100     int stride;
 101     int width;
 102     int height;
 103     int quant;
 104     IDWTELEM *ibuf;
 105     struct SubBand *parent;
 106
 107     /* for low delay */
 108     unsigned length;
 109     const uint8_t *coeff_data;
 110 } SubBand;
 111
 112 typedef struct Plane {
 113     int width;
 114     int height;
 115     ptrdiff_t stride;
 116
 117     int idwt_width;
 118     int idwt_height;
 119     int idwt_stride;
 120     IDWTELEM *idwt_buf;
 121     IDWTELEM *idwt_buf_base;
 122     IDWTELEM *idwt_tmp;
 123
 124     /* block length */
 125     uint8_t xblen;
 126     uint8_t yblen;
 127     /* block separation (block n+1 starts after this many pixels in block n) */
 128     uint8_t xbsep;
 129     uint8_t ybsep;
 130     /* amount of overspill on each edge (half of the overlap between blocks) */
 131     uint8_t xoffset;
 132     uint8_t yoffset;
 133
 134     SubBand band[MAX_DWT_LEVELS][4];
 135 } Plane;
 136
 137 typedef struct DiracContext {
 138     AVCodecContext *avctx;
 139     DSPContext dsp;
 140     DiracDSPContext diracdsp;
 141     GetBitContext gb;
 142     dirac_source_params source;
 143     int seen_sequence_header;
 144     int frame_number;           /* number of the next frame to display       */
 145     Plane plane[3];
 146     int chroma_x_shift;
 147     int chroma_y_shift;
 148
 149     int zero_res;               /* zero residue flag                         */
 150     int is_arith;               /* whether coeffs use arith or golomb coding */
 151     int low_delay;              /* use the low delay syntax                  */
 152     int globalmc_flag;          /* use global motion compensation            */
 153     int num_refs;               /* number of reference pictures              */
 154
 155     /* wavelet decoding */
 156     unsigned wavelet_depth;     /* depth of the IDWT                         */
 157     unsigned wavelet_idx;
 158
 159     /**
 160      * schroedinger older than 1.0.8 doesn't store
 161      * quant delta if only one codebook exists in a band
 162      */
 163     unsigned old_delta_quant;
 164     unsigned codeblock_mode;
 165
 166     struct {
 167         unsigned width;
 168         unsigned height;
 169     } codeblock[MAX_DWT_LEVELS+1];
 170
 171     struct {
 172         unsigned num_x;         /* number of horizontal slices               */
 173         unsigned num_y;         /* number of vertical slices                 */
 174         AVRational bytes;       /* average bytes per slice                   */
 175         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 176     } lowdelay;
 177
 178     struct {
 179         int pan_tilt[2];        /* pan/tilt vector                           */
 180         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 181         int perspective[2];     /* perspective vector                        */
 182         unsigned zrs_exp;
 183         unsigned perspective_exp;
 184     } globalmc[2];
 185
 186     /* motion compensation */
 187     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 188     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 189     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 190
 191     int blwidth;                /* number of blocks (horizontally)           */
 192     int blheight;               /* number of blocks (vertically)             */
 193     int sbwidth;                /* number of superblocks (horizontally)      */
 194     int sbheight;               /* number of superblocks (vertically)        */
 195
 196     uint8_t *sbsplit;
 197     DiracBlock *blmotion;
 198
 199     uint8_t *edge_emu_buffer[4];
 200     uint8_t *edge_emu_buffer_base;
 201
 202     uint16_t *mctmp;            /* buffer holding the MC data multipled by OBMC weights */
 203     uint8_t *mcscratch;
 204     int buffer_stride;
 205
 206     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 207
 208     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 209     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 210     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 211     dirac_weight_func weight_func;
 212     dirac_biweight_func biweight_func;
 213
 214     DiracFrame *current_picture;
 215     DiracFrame *ref_pics[2];
 216
 217     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 218     DiracFrame *delay_frames[MAX_DELAY+1];
 219     DiracFrame all_frames[MAX_FRAMES];
 220 } DiracContext;
 221
 222 /**
 223  * Dirac Specification ->
 224  * Parse code values. 9.6.1 Table 9.1
 225  */
 226 enum dirac_parse_code {
 227     pc_seq_header         = 0x00,
 228     pc_eos                = 0x10,
 229     pc_aux_data           = 0x20,
 230     pc_padding            = 0x30,
 231 };
 232
 233 enum dirac_subband {
 234     subband_ll = 0,
 235     subband_hl = 1,
 236     subband_lh = 2,
 237     subband_hh = 3
 238 };
 239
 240 static const uint8_t default_qmat[][4][4] = {
 241     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 242     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 243     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 244     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 245     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 246     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 247     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 248 };
 249
 250 static const int qscale_tab[MAX_QUANT+1] = {
 251     4,     5,     6,     7,     8,    10,    11,    13,
 252     16,    19,    23,    27,    32,    38,    45,    54,
 253     64,    76,    91,   108,   128,   152,   181,   215,
 254     256,   304,   362,   431,   512,   609,   724,   861,
 255     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 256     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 257     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 258     65536, 77936
 259 };
 260
 261 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 262     1,     2,     3,     4,     4,     5,     6,     7,
 263     8,    10,    12,    14,    16,    19,    23,    27,
 264     32,    38,    46,    54,    64,    76,    91,   108,
 265     128,   152,   181,   216,   256,   305,   362,   431,
 266     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 267     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 268     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 269     32768, 38968
 270 };
 271
 272 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 273     1,     2,     2,     3,     3,     4,     4,     5,
 274     6,     7,     9,    10,    12,    14,    17,    20,
 275     24,    29,    34,    41,    48,    57,    68,    81,
 276     96,   114,   136,   162,   192,   228,   272,   323,
 277     384,   457,   543,   646,   768,   913,  1086,  1292,
 278     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 279     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 280     24576, 29226
 281 };
 282
 283 /* magic number division by 3 from schroedinger */
 284 static inline int divide3(int x)
 285 {
 286     return ((x+1)*21845 + 10922) >> 16;
 287 }
 288
 289 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 290 {
 291     DiracFrame *remove_pic = NULL;
 292     int i, remove_idx = -1;
 293
 294     for (i = 0; framelist[i]; i++)
 295         if (framelist[i]->avframe->display_picture_number == picnum) {
 296             remove_pic = framelist[i];
 297             remove_idx = i;
 298         }
 299
 300     if (remove_pic)
 301         for (i = remove_idx; framelist[i]; i++)
 302             framelist[i] = framelist[i+1];
 303
 304     return remove_pic;
 305 }
 306
 307 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 308 {
 309     int i;
 310     for (i = 0; i < maxframes; i++)
 311         if (!framelist[i]) {
 312             framelist[i] = frame;
 313             return 0;
 314         }
 315     return -1;
 316 }
 317
 318 static int alloc_sequence_buffers(DiracContext *s)
 319 {
 320     int sbwidth  = DIVRNDUP(s->source.width,  4);
 321     int sbheight = DIVRNDUP(s->source.height, 4);
 322     int i, w, h, top_padding;
 323
 324     /* todo: think more about this / use or set Plane here */
 325     for (i = 0; i < 3; i++) {
 326         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 327         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 328         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 329         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 330
 331         /* we allocate the max we support here since num decompositions can
 332          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 333          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 334          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 335          * on each side */
 336         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 337         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 338         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 339
 340         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
 341         s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
 342         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 343         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 344             return AVERROR(ENOMEM);
 345     }
 346
 347     /* fixme: allocate using real stride here */
 348     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 349     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 350
 351     if (!s->sbsplit || !s->blmotion)
 352         return AVERROR(ENOMEM);
 353     return 0;
 354 }
 355
 356 static int alloc_buffers(DiracContext *s, int stride)
 357 {
 358     int w = s->source.width;
 359     int h = s->source.height;
 360
 361     av_assert0(stride >= w);
 362     stride += 64;
 363
 364     if (s->buffer_stride >= stride)
 365         return 0;
 366     s->buffer_stride = 0;
 367
 368     av_freep(&s->edge_emu_buffer_base);
 369     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 370     av_freep(&s->mctmp);
 371     av_freep(&s->mcscratch);
 372
 373     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 374
 375     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 376     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 377
 378     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 379         return AVERROR(ENOMEM);
 380
 381     s->buffer_stride = stride;
 382     return 0;
 383 }
 384
 385 static void free_sequence_buffers(DiracContext *s)
 386 {
 387     int i, j, k;
 388
 389     for (i = 0; i < MAX_FRAMES; i++) {
 390         if (s->all_frames[i].avframe->data[0]) {
 391             av_frame_unref(s->all_frames[i].avframe);
 392             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 393         }
 394
 395         for (j = 0; j < 3; j++)
 396             for (k = 1; k < 4; k++)
 397                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 398     }
 399
 400     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 401     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 402
 403     for (i = 0; i < 3; i++) {
 404         av_freep(&s->plane[i].idwt_buf_base);
 405         av_freep(&s->plane[i].idwt_tmp);
 406     }
 407
 408     s->buffer_stride = 0;
 409     av_freep(&s->sbsplit);
 410     av_freep(&s->blmotion);
 411     av_freep(&s->edge_emu_buffer_base);
 412
 413     av_freep(&s->mctmp);
 414     av_freep(&s->mcscratch);
 415 }
 416
 417 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 418 {
 419     DiracContext *s = avctx->priv_data;
 420     int i;
 421
 422     s->avctx = avctx;
 423     s->frame_number = -1;
 424
 425     ff_dsputil_init(&s->dsp, avctx);
 426     ff_diracdsp_init(&s->diracdsp);
 427
 428     for (i = 0; i < MAX_FRAMES; i++) {
 429         s->all_frames[i].avframe = av_frame_alloc();
 430         if (!s->all_frames[i].avframe) {
 431             while (i > 0)
 432                 av_frame_free(&s->all_frames[--i].avframe);
 433             return AVERROR(ENOMEM);
 434         }
 435     }
 436
 437     return 0;
 438 }
 439
 440 static void dirac_decode_flush(AVCodecContext *avctx)
 441 {
 442     DiracContext *s = avctx->priv_data;
 443     free_sequence_buffers(s);
 444     s->seen_sequence_header = 0;
 445     s->frame_number = -1;
 446 }
 447
 448 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 449 {
 450     DiracContext *s = avctx->priv_data;
 451     int i;
 452
 453     dirac_decode_flush(avctx);
 454     for (i = 0; i < MAX_FRAMES; i++)
 455         av_frame_free(&s->all_frames[i].avframe);
 456
 457     return 0;
 458 }
 459
 460 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 461
 462 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 463                                       SubBand *b, IDWTELEM *buf, int x, int y)
 464 {
 465     int coeff, sign;
 466     int sign_pred = 0;
 467     int pred_ctx = CTX_ZPZN_F1;
 468
 469     /* Check if the parent subband has a 0 in the corresponding position */
 470     if (b->parent)
 471         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 472
 473     if (b->orientation == subband_hl)
 474         sign_pred = buf[-b->stride];
 475
 476     /* Determine if the pixel has only zeros in its neighbourhood */
 477     if (x) {
 478         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 479         if (b->orientation == subband_lh)
 480             sign_pred = buf[-1];
 481     } else {
 482         pred_ctx += !buf[-b->stride];
 483     }
 484
 485     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 486     if (coeff) {
 487         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 488         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 489         coeff = (coeff ^ -sign) + sign;
 490     }
 491     *buf = coeff;
 492 }
 493
 494 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 495 {
 496     int sign, coeff;
 497
 498     coeff = svq3_get_ue_golomb(gb);
 499     if (coeff) {
 500         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 501         sign  = get_bits1(gb);
 502         coeff = (coeff ^ -sign) + sign;
 503     }
 504     return coeff;
 505 }
 506
 507 /**
 508  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 509  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 510  */
 511 static inline void codeblock(DiracContext *s, SubBand *b,
 512                              GetBitContext *gb, DiracArith *c,
 513                              int left, int right, int top, int bottom,
 514                              int blockcnt_one, int is_arith)
 515 {
 516     int x, y, zero_block;
 517     int qoffset, qfactor;
 518     IDWTELEM *buf;
 519
 520     /* check for any coded coefficients in this codeblock */
 521     if (!blockcnt_one) {
 522         if (is_arith)
 523             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 524         else
 525             zero_block = get_bits1(gb);
 526
 527         if (zero_block)
 528             return;
 529     }
 530
 531     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 532         int quant = b->quant;
 533         if (is_arith)
 534             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 535         else
 536             quant += dirac_get_se_golomb(gb);
 537         if (quant < 0) {
 538             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 539             return;
 540         }
 541         b->quant = quant;
 542     }
 543
 544     b->quant = FFMIN(b->quant, MAX_QUANT);
 545
 546     qfactor = qscale_tab[b->quant];
 547     /* TODO: context pointer? */
 548     if (!s->num_refs)
 549         qoffset = qoffset_intra_tab[b->quant];
 550     else
 551         qoffset = qoffset_inter_tab[b->quant];
 552
 553     buf = b->ibuf + top * b->stride;
 554     for (y = top; y < bottom; y++) {
 555         for (x = left; x < right; x++) {
 556             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 557             if (is_arith)
 558                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 559             else
 560                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 561         }
 562         buf += b->stride;
 563     }
 564 }
 565
 566 /**
 567  * Dirac Specification ->
 568  * 13.3 intra_dc_prediction(band)
 569  */
 570 static inline void intra_dc_prediction(SubBand *b)
 571 {
 572     IDWTELEM *buf = b->ibuf;
 573     int x, y;
 574
 575     for (x = 1; x < b->width; x++)
 576         buf[x] += buf[x-1];
 577     buf += b->stride;
 578
 579     for (y = 1; y < b->height; y++) {
 580         buf[0] += buf[-b->stride];
 581
 582         for (x = 1; x < b->width; x++) {
 583             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 584             buf[x]  += divide3(pred);
 585         }
 586         buf += b->stride;
 587     }
 588 }
 589
 590 /**
 591  * Dirac Specification ->
 592  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 593  */
 594 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 595 {
 596     int cb_x, cb_y, left, right, top, bottom;
 597     DiracArith c;
 598     GetBitContext gb;
 599     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 600     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 601     int blockcnt_one = (cb_width + cb_height) == 2;
 602
 603     if (!b->length)
 604         return;
 605
 606     init_get_bits8(&gb, b->coeff_data, b->length);
 607
 608     if (is_arith)
 609         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 610
 611     top = 0;
 612     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 613         bottom = (b->height * (cb_y+1)) / cb_height;
 614         left = 0;
 615         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 616             right = (b->width * (cb_x+1)) / cb_width;
 617             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 618             left = right;
 619         }
 620         top = bottom;
 621     }
 622
 623     if (b->orientation == subband_ll && s->num_refs == 0)
 624         intra_dc_prediction(b);
 625 }
 626
 627 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 628 {
 629     DiracContext *s = avctx->priv_data;
 630     decode_subband_internal(s, b, 1);
 631     return 0;
 632 }
 633
 634 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 635 {
 636     DiracContext *s = avctx->priv_data;
 637     SubBand **b     = arg;
 638     decode_subband_internal(s, *b, 0);
 639     return 0;
 640 }
 641
 642 /**
 643  * Dirac Specification ->
 644  * [DIRAC_STD] 13.4.1 core_transform_data()
 645  */
 646 static void decode_component(DiracContext *s, int comp)
 647 {
 648     AVCodecContext *avctx = s->avctx;
 649     SubBand *bands[3*MAX_DWT_LEVELS+1];
 650     enum dirac_subband orientation;
 651     int level, num_bands = 0;
 652
 653     /* Unpack all subbands at all levels. */
 654     for (level = 0; level < s->wavelet_depth; level++) {
 655         for (orientation = !!level; orientation < 4; orientation++) {
 656             SubBand *b = &s->plane[comp].band[level][orientation];
 657             bands[num_bands++] = b;
 658
 659             align_get_bits(&s->gb);
 660             /* [DIRAC_STD] 13.4.2 subband() */
 661             b->length = svq3_get_ue_golomb(&s->gb);
 662             if (b->length) {
 663                 b->quant = svq3_get_ue_golomb(&s->gb);
 664                 align_get_bits(&s->gb);
 665                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 666                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 667                 skip_bits_long(&s->gb, b->length*8);
 668             }
 669         }
 670         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 671         if (s->is_arith)
 672             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 673                            NULL, 4-!!level, sizeof(SubBand));
 674     }
 675     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 676     if (!s->is_arith)
 677         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 678 }
 679
 680 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 681 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 682 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 683                              int slice_x, int slice_y, int bits_end,
 684                              SubBand *b1, SubBand *b2)
 685 {
 686     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 687     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 688     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 689     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 690
 691     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 692     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 693
 694     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 695     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 696     int x, y;
 697     /* we have to constantly check for overread since the spec explictly
 698        requires this, with the meaning that all remaining coeffs are set to 0 */
 699     if (get_bits_count(gb) >= bits_end)
 700         return;
 701
 702     for (y = top; y < bottom; y++) {
 703         for (x = left; x < right; x++) {
 704             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 705             if (get_bits_count(gb) >= bits_end)
 706                 return;
 707             if (buf2) {
 708                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 709                 if (get_bits_count(gb) >= bits_end)
 710                     return;
 711             }
 712         }
 713         buf1 += b1->stride;
 714         if (buf2)
 715             buf2 += b2->stride;
 716     }
 717 }
 718
 719 struct lowdelay_slice {
 720     GetBitContext gb;
 721     int slice_x;
 722     int slice_y;
 723     int bytes;
 724 };
 725
 726
 727 /**
 728  * Dirac Specification ->
 729  * 13.5.2 Slices. slice(sx,sy)
 730  */
 731 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 732 {
 733     DiracContext *s = avctx->priv_data;
 734     struct lowdelay_slice *slice = arg;
 735     GetBitContext *gb = &slice->gb;
 736     enum dirac_subband orientation;
 737     int level, quant, chroma_bits, chroma_end;
 738
 739     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 740     int length_bits = av_log2(8 * slice->bytes)+1;
 741     int luma_bits   = get_bits_long(gb, length_bits);
 742     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 743
 744     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 745     for (level = 0; level < s->wavelet_depth; level++)
 746         for (orientation = !!level; orientation < 4; orientation++) {
 747             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 748             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 749                              &s->plane[0].band[level][orientation], NULL);
 750         }
 751
 752     /* consume any unused bits from luma */
 753     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 754
 755     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 756     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 757     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 758     for (level = 0; level < s->wavelet_depth; level++)
 759         for (orientation = !!level; orientation < 4; orientation++) {
 760             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 761             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 762                              &s->plane[1].band[level][orientation],
 763                              &s->plane[2].band[level][orientation]);
 764         }
 765
 766     return 0;
 767 }
 768
 769 /**
 770  * Dirac Specification ->
 771  * 13.5.1 low_delay_transform_data()
 772  */
 773 static void decode_lowdelay(DiracContext *s)
 774 {
 775     AVCodecContext *avctx = s->avctx;
 776     int slice_x, slice_y, bytes, bufsize;
 777     const uint8_t *buf;
 778     struct lowdelay_slice *slices;
 779     int slice_num = 0;
 780
 781     slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 782
 783     align_get_bits(&s->gb);
 784     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 785     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 786     bufsize = get_bits_left(&s->gb);
 787
 788     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 789         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 790             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 791                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 792
 793             slices[slice_num].bytes   = bytes;
 794             slices[slice_num].slice_x = slice_x;
 795             slices[slice_num].slice_y = slice_y;
 796             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 797             slice_num++;
 798
 799             buf     += bytes;
 800             bufsize -= bytes*8;
 801         }
 802
 803     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 804                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 805     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 806     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 807     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 808     av_free(slices);
 809 }
 810
 811 static void init_planes(DiracContext *s)
 812 {
 813     int i, w, h, level, orientation;
 814
 815     for (i = 0; i < 3; i++) {
 816         Plane *p = &s->plane[i];
 817
 818         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 819         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 820         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 821         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 822         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 823
 824         for (level = s->wavelet_depth-1; level >= 0; level--) {
 825             w = w>>1;
 826             h = h>>1;
 827             for (orientation = !!level; orientation < 4; orientation++) {
 828                 SubBand *b = &p->band[level][orientation];
 829
 830                 b->ibuf   = p->idwt_buf;
 831                 b->level  = level;
 832                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 833                 b->width  = w;
 834                 b->height = h;
 835                 b->orientation = orientation;
 836
 837                 if (orientation & 1)
 838                     b->ibuf += w;
 839                 if (orientation > 1)
 840                     b->ibuf += b->stride>>1;
 841
 842                 if (level)
 843                     b->parent = &p->band[level-1][orientation];
 844             }
 845         }
 846
 847         if (i > 0) {
 848             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 849             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 850             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 851             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 852         }
 853
 854         p->xoffset = (p->xblen - p->xbsep)/2;
 855         p->yoffset = (p->yblen - p->ybsep)/2;
 856     }
 857 }
 858
 859 /**
 860  * Unpack the motion compensation parameters
 861  * Dirac Specification ->
 862  * 11.2 Picture prediction data. picture_prediction()
 863  */
 864 static int dirac_unpack_prediction_parameters(DiracContext *s)
 865 {
 866     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 867     static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
 868
 869     GetBitContext *gb = &s->gb;
 870     unsigned idx, ref;
 871
 872     align_get_bits(gb);
 873     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 874     /* Luma and Chroma are equal. 11.2.3 */
 875     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 876
 877     if (idx > 4) {
 878         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 879         return -1;
 880     }
 881
 882     if (idx == 0) {
 883         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 884         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 885         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 886         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 887     } else {
 888         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 889         s->plane[0].xblen = default_blen[idx-1];
 890         s->plane[0].yblen = default_blen[idx-1];
 891         s->plane[0].xbsep = default_bsep[idx-1];
 892         s->plane[0].ybsep = default_bsep[idx-1];
 893     }
 894     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 895       Calculated in function dirac_unpack_block_motion_data */
 896
 897     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 898         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 899         return -1;
 900     }
 901     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 902         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 903         return -1;
 904     }
 905     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 906         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 907         return -1;
 908     }
 909
 910     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 911       Read motion vector precision */
 912     s->mv_precision = svq3_get_ue_golomb(gb);
 913     if (s->mv_precision > 3) {
 914         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 915         return -1;
 916     }
 917
 918     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 919       Read the global motion compensation parameters */
 920     s->globalmc_flag = get_bits1(gb);
 921     if (s->globalmc_flag) {
 922         memset(s->globalmc, 0, sizeof(s->globalmc));
 923         /* [DIRAC_STD] pan_tilt(gparams) */
 924         for (ref = 0; ref < s->num_refs; ref++) {
 925             if (get_bits1(gb)) {
 926                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 927                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 928             }
 929             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 930                zoom/rotation/shear parameters */
 931             if (get_bits1(gb)) {
 932                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 933                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 934                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 935                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 936                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 937             } else {
 938                 s->globalmc[ref].zrs[0][0] = 1;
 939                 s->globalmc[ref].zrs[1][1] = 1;
 940             }
 941             /* [DIRAC_STD] perspective(gparams) */
 942             if (get_bits1(gb)) {
 943                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 944                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 945                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 946             }
 947         }
 948     }
 949
 950     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 951       Picture prediction mode, not currently used. */
 952     if (svq3_get_ue_golomb(gb)) {
 953         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 954         return -1;
 955     }
 956
 957     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 958        just data read, weight calculation will be done later on. */
 959     s->weight_log2denom = 1;
 960     s->weight[0]        = 1;
 961     s->weight[1]        = 1;
 962
 963     if (get_bits1(gb)) {
 964         s->weight_log2denom = svq3_get_ue_golomb(gb);
 965         s->weight[0] = dirac_get_se_golomb(gb);
 966         if (s->num_refs == 2)
 967             s->weight[1] = dirac_get_se_golomb(gb);
 968     }
 969     return 0;
 970 }
 971
 972 /**
 973  * Dirac Specification ->
 974  * 11.3 Wavelet transform data. wavelet_transform()
 975  */
 976 static int dirac_unpack_idwt_params(DiracContext *s)
 977 {
 978     GetBitContext *gb = &s->gb;
 979     int i, level;
 980     unsigned tmp;
 981
 982 #define CHECKEDREAD(dst, cond, errmsg) \
 983     tmp = svq3_get_ue_golomb(gb); \
 984     if (cond) { \
 985         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
 986         return -1; \
 987     }\
 988     dst = tmp;
 989
 990     align_get_bits(gb);
 991
 992     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
 993     if (s->zero_res)
 994         return 0;
 995
 996     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
 997     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
 998
 999     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1000
1001     if (!s->low_delay) {
1002         /* Codeblock parameters (core syntax only) */
1003         if (get_bits1(gb)) {
1004             for (i = 0; i <= s->wavelet_depth; i++) {
1005                 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
1006                 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
1007             }
1008
1009             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1010         } else
1011             for (i = 0; i <= s->wavelet_depth; i++)
1012                 s->codeblock[i].width = s->codeblock[i].height = 1;
1013     } else {
1014         /* Slice parameters + quantization matrix*/
1015         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1016         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
1017         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
1018         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1019         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1020
1021         if (s->lowdelay.bytes.den <= 0) {
1022             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1023             return AVERROR_INVALIDDATA;
1024         }
1025
1026         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1027         if (get_bits1(gb)) {
1028             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1029             /* custom quantization matrix */
1030             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1031             for (level = 0; level < s->wavelet_depth; level++) {
1032                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1033                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1034                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1035             }
1036         } else {
1037             if (s->wavelet_depth > 4) {
1038                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1039                 return AVERROR_INVALIDDATA;
1040             }
1041             /* default quantization matrix */
1042             for (level = 0; level < s->wavelet_depth; level++)
1043                 for (i = 0; i < 4; i++) {
1044                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1045                     /* haar with no shift differs for different depths */
1046                     if (s->wavelet_idx == 3)
1047                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1048                 }
1049         }
1050     }
1051     return 0;
1052 }
1053
1054 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1055 {
1056     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1057
1058     if (!(x|y))
1059         return 0;
1060     else if (!y)
1061         return sbsplit[-1];
1062     else if (!x)
1063         return sbsplit[-stride];
1064
1065     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1066 }
1067
1068 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1069 {
1070     int pred;
1071
1072     if (!(x|y))
1073         return 0;
1074     else if (!y)
1075         return block[-1].ref & refmask;
1076     else if (!x)
1077         return block[-stride].ref & refmask;
1078
1079     /* return the majority */
1080     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1081     return (pred >> 1) & refmask;
1082 }
1083
1084 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1085 {
1086     int i, n = 0;
1087
1088     memset(block->u.dc, 0, sizeof(block->u.dc));
1089
1090     if (x && !(block[-1].ref & 3)) {
1091         for (i = 0; i < 3; i++)
1092             block->u.dc[i] += block[-1].u.dc[i];
1093         n++;
1094     }
1095
1096     if (y && !(block[-stride].ref & 3)) {
1097         for (i = 0; i < 3; i++)
1098             block->u.dc[i] += block[-stride].u.dc[i];
1099         n++;
1100     }
1101
1102     if (x && y && !(block[-1-stride].ref & 3)) {
1103         for (i = 0; i < 3; i++)
1104             block->u.dc[i] += block[-1-stride].u.dc[i];
1105         n++;
1106     }
1107
1108     if (n == 2) {
1109         for (i = 0; i < 3; i++)
1110             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1111     } else if (n == 3) {
1112         for (i = 0; i < 3; i++)
1113             block->u.dc[i] = divide3(block->u.dc[i]);
1114     }
1115 }
1116
1117 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1118 {
1119     int16_t *pred[3];
1120     int refmask = ref+1;
1121     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1122     int n = 0;
1123
1124     if (x && (block[-1].ref & mask) == refmask)
1125         pred[n++] = block[-1].u.mv[ref];
1126
1127     if (y && (block[-stride].ref & mask) == refmask)
1128         pred[n++] = block[-stride].u.mv[ref];
1129
1130     if (x && y && (block[-stride-1].ref & mask) == refmask)
1131         pred[n++] = block[-stride-1].u.mv[ref];
1132
1133     switch (n) {
1134     case 0:
1135         block->u.mv[ref][0] = 0;
1136         block->u.mv[ref][1] = 0;
1137         break;
1138     case 1:
1139         block->u.mv[ref][0] = pred[0][0];
1140         block->u.mv[ref][1] = pred[0][1];
1141         break;
1142     case 2:
1143         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1144         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1145         break;
1146     case 3:
1147         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1148         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1149         break;
1150     }
1151 }
1152
1153 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1154 {
1155     int ez      = s->globalmc[ref].zrs_exp;
1156     int ep      = s->globalmc[ref].perspective_exp;
1157     int (*A)[2] = s->globalmc[ref].zrs;
1158     int *b      = s->globalmc[ref].pan_tilt;
1159     int *c      = s->globalmc[ref].perspective;
1160
1161     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1162     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1163     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1164
1165     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1166     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1167 }
1168
1169 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1170                                 int stride, int x, int y)
1171 {
1172     int i;
1173
1174     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1175     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1176
1177     if (s->num_refs == 2) {
1178         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1179         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1180     }
1181
1182     if (!block->ref) {
1183         pred_block_dc(block, stride, x, y);
1184         for (i = 0; i < 3; i++)
1185             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1186         return;
1187     }
1188
1189     if (s->globalmc_flag) {
1190         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1191         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1192     }
1193
1194     for (i = 0; i < s->num_refs; i++)
1195         if (block->ref & (i+1)) {
1196             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1197                 global_mv(s, block, x, y, i);
1198             } else {
1199                 pred_mv(block, stride, x, y, i);
1200                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1201                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1202             }
1203         }
1204 }
1205
1206 /**
1207  * Copies the current block to the other blocks covered by the current superblock split mode
1208  */
1209 static void propagate_block_data(DiracBlock *block, int stride, int size)
1210 {
1211     int x, y;
1212     DiracBlock *dst = block;
1213
1214     for (x = 1; x < size; x++)
1215         dst[x] = *block;
1216
1217     for (y = 1; y < size; y++) {
1218         dst += stride;
1219         for (x = 0; x < size; x++)
1220             dst[x] = *block;
1221     }
1222 }
1223
1224 /**
1225  * Dirac Specification ->
1226  * 12. Block motion data syntax
1227  */
1228 static int dirac_unpack_block_motion_data(DiracContext *s)
1229 {
1230     GetBitContext *gb = &s->gb;
1231     uint8_t *sbsplit = s->sbsplit;
1232     int i, x, y, q, p;
1233     DiracArith arith[8];
1234
1235     align_get_bits(gb);
1236
1237     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1238     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1239     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1240     s->blwidth  = 4 * s->sbwidth;
1241     s->blheight = 4 * s->sbheight;
1242
1243     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1244        decode superblock split modes */
1245     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1246     for (y = 0; y < s->sbheight; y++) {
1247         for (x = 0; x < s->sbwidth; x++) {
1248             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1249             if (split > 2)
1250                 return -1;
1251             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1252         }
1253         sbsplit += s->sbwidth;
1254     }
1255
1256     /* setup arith decoding */
1257     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1258     for (i = 0; i < s->num_refs; i++) {
1259         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1260         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1261     }
1262     for (i = 0; i < 3; i++)
1263         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1264
1265     for (y = 0; y < s->sbheight; y++)
1266         for (x = 0; x < s->sbwidth; x++) {
1267             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1268             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1269
1270             for (q = 0; q < blkcnt; q++)
1271                 for (p = 0; p < blkcnt; p++) {
1272                     int bx = 4 * x + p*step;
1273                     int by = 4 * y + q*step;
1274                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1275                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1276                     propagate_block_data(block, s->blwidth, step);
1277                 }
1278         }
1279
1280     return 0;
1281 }
1282
1283 static int weight(int i, int blen, int offset)
1284 {
1285 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1286     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1287
1288     if (i < 2*offset)
1289         return ROLLOFF(i);
1290     else if (i > blen-1 - 2*offset)
1291         return ROLLOFF(blen-1 - i);
1292     return 8;
1293 }
1294
1295 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1296                                  int left, int right, int wy)
1297 {
1298     int x;
1299     for (x = 0; left && x < p->xblen >> 1; x++)
1300         obmc_weight[x] = wy*8;
1301     for (; x < p->xblen >> right; x++)
1302         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1303     for (; x < p->xblen; x++)
1304         obmc_weight[x] = wy*8;
1305     for (; x < stride; x++)
1306         obmc_weight[x] = 0;
1307 }
1308
1309 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1310                              int left, int right, int top, int bottom)
1311 {
1312     int y;
1313     for (y = 0; top && y < p->yblen >> 1; y++) {
1314         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1315         obmc_weight += stride;
1316     }
1317     for (; y < p->yblen >> bottom; y++) {
1318         int wy = weight(y, p->yblen, p->yoffset);
1319         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1320         obmc_weight += stride;
1321     }
1322     for (; y < p->yblen; y++) {
1323         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1324         obmc_weight += stride;
1325     }
1326 }
1327
1328 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1329 {
1330     int top = !by;
1331     int bottom = by == s->blheight-1;
1332
1333     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1334     if (top || bottom || by == 1) {
1335         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1336         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1337         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1338     }
1339 }
1340
1341 static const uint8_t epel_weights[4][4][4] = {
1342     {{ 16,  0,  0,  0 },
1343      { 12,  4,  0,  0 },
1344      {  8,  8,  0,  0 },
1345      {  4, 12,  0,  0 }},
1346     {{ 12,  0,  4,  0 },
1347      {  9,  3,  3,  1 },
1348      {  6,  6,  2,  2 },
1349      {  3,  9,  1,  3 }},
1350     {{  8,  0,  8,  0 },
1351      {  6,  2,  6,  2 },
1352      {  4,  4,  4,  4 },
1353      {  2,  6,  2,  6 }},
1354     {{  4,  0, 12,  0 },
1355      {  3,  1,  9,  3 },
1356      {  2,  2,  6,  6 },
1357      {  1,  3,  3,  9 }}
1358 };
1359
1360 /**
1361  * For block x,y, determine which of the hpel planes to do bilinear
1362  * interpolation from and set src[] to the location in each hpel plane
1363  * to MC from.
1364  *
1365  * @return the index of the put_dirac_pixels_tab function to use
1366  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1367  */
1368 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1369                      int x, int y, int ref, int plane)
1370 {
1371     Plane *p = &s->plane[plane];
1372     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1373     int motion_x = block->u.mv[ref][0];
1374     int motion_y = block->u.mv[ref][1];
1375     int mx, my, i, epel, nplanes = 0;
1376
1377     if (plane) {
1378         motion_x >>= s->chroma_x_shift;
1379         motion_y >>= s->chroma_y_shift;
1380     }
1381
1382     mx         = motion_x & ~(-1U << s->mv_precision);
1383     my         = motion_y & ~(-1U << s->mv_precision);
1384     motion_x >>= s->mv_precision;
1385     motion_y >>= s->mv_precision;
1386     /* normalize subpel coordinates to epel */
1387     /* TODO: template this function? */
1388     mx      <<= 3 - s->mv_precision;
1389     my      <<= 3 - s->mv_precision;
1390
1391     x += motion_x;
1392     y += motion_y;
1393     epel = (mx|my)&1;
1394
1395     /* hpel position */
1396     if (!((mx|my)&3)) {
1397         nplanes = 1;
1398         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1399     } else {
1400         /* qpel or epel */
1401         nplanes = 4;
1402         for (i = 0; i < 4; i++)
1403             src[i] = ref_hpel[i] + y*p->stride + x;
1404
1405         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1406            we increment x/y because the edge changes for half of the pixels */
1407         if (mx > 4) {
1408             src[0] += 1;
1409             src[2] += 1;
1410             x++;
1411         }
1412         if (my > 4) {
1413             src[0] += p->stride;
1414             src[1] += p->stride;
1415             y++;
1416         }
1417
1418         /* hpel planes are:
1419            [0]: F  [1]: H
1420            [2]: V  [3]: C */
1421         if (!epel) {
1422             /* check if we really only need 2 planes since either mx or my is
1423                a hpel position. (epel weights of 0 handle this there) */
1424             if (!(mx&3)) {
1425                 /* mx == 0: average [0] and [2]
1426                    mx == 4: average [1] and [3] */
1427                 src[!mx] = src[2 + !!mx];
1428                 nplanes = 2;
1429             } else if (!(my&3)) {
1430                 src[0] = src[(my>>1)  ];
1431                 src[1] = src[(my>>1)+1];
1432                 nplanes = 2;
1433             }
1434         } else {
1435             /* adjust the ordering if needed so the weights work */
1436             if (mx > 4) {
1437                 FFSWAP(const uint8_t *, src[0], src[1]);
1438                 FFSWAP(const uint8_t *, src[2], src[3]);
1439             }
1440             if (my > 4) {
1441                 FFSWAP(const uint8_t *, src[0], src[2]);
1442                 FFSWAP(const uint8_t *, src[1], src[3]);
1443             }
1444             src[4] = epel_weights[my&3][mx&3];
1445         }
1446     }
1447
1448     /* fixme: v/h _edge_pos */
1449     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1450         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1451         x < 0 || y < 0) {
1452         for (i = 0; i < nplanes; i++) {
1453             ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1454                                 p->stride, p->stride,
1455                                 p->xblen, p->yblen, x, y,
1456                                 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1457             src[i] = s->edge_emu_buffer[i];
1458         }
1459     }
1460     return (nplanes>>1) + epel;
1461 }
1462
1463 static void add_dc(uint16_t *dst, int dc, int stride,
1464                    uint8_t *obmc_weight, int xblen, int yblen)
1465 {
1466     int x, y;
1467     dc += 128;
1468
1469     for (y = 0; y < yblen; y++) {
1470         for (x = 0; x < xblen; x += 2) {
1471             dst[x  ] += dc * obmc_weight[x  ];
1472             dst[x+1] += dc * obmc_weight[x+1];
1473         }
1474         dst          += stride;
1475         obmc_weight  += MAX_BLOCKSIZE;
1476     }
1477 }
1478
1479 static void block_mc(DiracContext *s, DiracBlock *block,
1480                      uint16_t *mctmp, uint8_t *obmc_weight,
1481                      int plane, int dstx, int dsty)
1482 {
1483     Plane *p = &s->plane[plane];
1484     const uint8_t *src[5];
1485     int idx;
1486
1487     switch (block->ref&3) {
1488     case 0: /* DC */
1489         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1490         return;
1491     case 1:
1492     case 2:
1493         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1494         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1495         if (s->weight_func)
1496             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1497                            s->weight[0] + s->weight[1], p->yblen);
1498         break;
1499     case 3:
1500         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1501         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1502         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1503         if (s->biweight_func) {
1504             /* fixme: +32 is a quick hack */
1505             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1506             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1507                              s->weight[0], s->weight[1], p->yblen);
1508         } else
1509             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1510         break;
1511     }
1512     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1513 }
1514
1515 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1516 {
1517     Plane *p = &s->plane[plane];
1518     int x, dstx = p->xbsep - p->xoffset;
1519
1520     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1521     mctmp += p->xbsep;
1522
1523     for (x = 1; x < s->blwidth-1; x++) {
1524         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1525         dstx  += p->xbsep;
1526         mctmp += p->xbsep;
1527     }
1528     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1529 }
1530
1531 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1532 {
1533     int idx = 0;
1534     if (xblen > 8)
1535         idx = 1;
1536     if (xblen > 16)
1537         idx = 2;
1538
1539     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1540     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1541     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1542     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1543         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1544         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1545     } else {
1546         s->weight_func   = NULL;
1547         s->biweight_func = NULL;
1548     }
1549 }
1550
1551 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1552 {
1553     /* chroma allocates an edge of 8 when subsampled
1554        which for 4:2:2 means an h edge of 16 and v edge of 8
1555        just use 8 for everything for the moment */
1556     int i, edge = EDGE_WIDTH/2;
1557
1558     ref->hpel[plane][0] = ref->avframe->data[plane];
1559     s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1560
1561     /* no need for hpel if we only have fpel vectors */
1562     if (!s->mv_precision)
1563         return;
1564
1565     for (i = 1; i < 4; i++) {
1566         if (!ref->hpel_base[plane][i])
1567             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1568         /* we need to be 16-byte aligned even for chroma */
1569         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1570     }
1571
1572     if (!ref->interpolated[plane]) {
1573         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1574                                       ref->hpel[plane][3], ref->hpel[plane][0],
1575                                       ref->avframe->linesize[plane], width, height);
1576         s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1577         s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1578         s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1579     }
1580     ref->interpolated[plane] = 1;
1581 }
1582
1583 /**
1584  * Dirac Specification ->
1585  * 13.0 Transform data syntax. transform_data()
1586  */
1587 static int dirac_decode_frame_internal(DiracContext *s)
1588 {
1589     DWTContext d;
1590     int y, i, comp, dsty;
1591
1592     if (s->low_delay) {
1593         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1594         for (comp = 0; comp < 3; comp++) {
1595             Plane *p = &s->plane[comp];
1596             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1597         }
1598         if (!s->zero_res)
1599             decode_lowdelay(s);
1600     }
1601
1602     for (comp = 0; comp < 3; comp++) {
1603         Plane *p       = &s->plane[comp];
1604         uint8_t *frame = s->current_picture->avframe->data[comp];
1605
1606         /* FIXME: small resolutions */
1607         for (i = 0; i < 4; i++)
1608             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1609
1610         if (!s->zero_res && !s->low_delay)
1611         {
1612             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1613             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1614         }
1615         if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1616                                   s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1617             return -1;
1618
1619         if (!s->num_refs) { /* intra */
1620             for (y = 0; y < p->height; y += 16) {
1621                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1622                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1623                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1624             }
1625         } else { /* inter */
1626             int rowheight = p->ybsep*p->stride;
1627
1628             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1629
1630             for (i = 0; i < s->num_refs; i++)
1631                 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1632
1633             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1634
1635             dsty = -p->yoffset;
1636             for (y = 0; y < s->blheight; y++) {
1637                 int h     = 0,
1638                     start = FFMAX(dsty, 0);
1639                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1640                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1641
1642                 init_obmc_weights(s, p, y);
1643
1644                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1645                     h = p->height - start;
1646                 else
1647                     h = p->ybsep - (start - dsty);
1648                 if (h < 0)
1649                     break;
1650
1651                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1652                 mc_row(s, blocks, mctmp, comp, dsty);
1653
1654                 mctmp += (start - dsty)*p->stride + p->xoffset;
1655                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1656                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1657                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1658
1659                 dsty += p->ybsep;
1660             }
1661         }
1662     }
1663
1664
1665     return 0;
1666 }
1667
1668 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1669 {
1670     int ret, i;
1671     int chroma_x_shift, chroma_y_shift;
1672     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1673
1674     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1675     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1676     ret = ff_get_buffer(avctx, f, flags);
1677     if (ret < 0)
1678         return ret;
1679
1680     for (i = 0; f->data[i]; i++) {
1681         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1682                      f->linesize[i] + 32;
1683         f->data[i] += offset;
1684     }
1685     f->width  = avctx->width;
1686     f->height = avctx->height;
1687
1688     return 0;
1689 }
1690
1691 /**
1692  * Dirac Specification ->
1693  * 11.1.1 Picture Header. picture_header()
1694  */
1695 static int dirac_decode_picture_header(DiracContext *s)
1696 {
1697     int retire, picnum;
1698     int i, j, refnum, refdist;
1699     GetBitContext *gb = &s->gb;
1700
1701     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1702     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1703
1704
1705     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1706
1707     /* if this is the first keyframe after a sequence header, start our
1708        reordering from here */
1709     if (s->frame_number < 0)
1710         s->frame_number = picnum;
1711
1712     s->ref_pics[0] = s->ref_pics[1] = NULL;
1713     for (i = 0; i < s->num_refs; i++) {
1714         refnum = picnum + dirac_get_se_golomb(gb);
1715         refdist = INT_MAX;
1716
1717         /* find the closest reference to the one we want */
1718         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1719         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1720             if (s->ref_frames[j]
1721                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1722                 s->ref_pics[i] = s->ref_frames[j];
1723                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1724             }
1725
1726         if (!s->ref_pics[i] || refdist)
1727             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1728
1729         /* if there were no references at all, allocate one */
1730         if (!s->ref_pics[i])
1731             for (j = 0; j < MAX_FRAMES; j++)
1732                 if (!s->all_frames[j].avframe->data[0]) {
1733                     s->ref_pics[i] = &s->all_frames[j];
1734                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1735                     break;
1736                 }
1737     }
1738
1739     /* retire the reference frames that are not used anymore */
1740     if (s->current_picture->avframe->reference) {
1741         retire = picnum + dirac_get_se_golomb(gb);
1742         if (retire != picnum) {
1743             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1744
1745             if (retire_pic)
1746                 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1747             else
1748                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1749         }
1750
1751         /* if reference array is full, remove the oldest as per the spec */
1752         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1753             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1754             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1755         }
1756     }
1757
1758     if (s->num_refs) {
1759         if (dirac_unpack_prediction_parameters(s))  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1760             return -1;
1761         if (dirac_unpack_block_motion_data(s))      /* [DIRAC_STD] 12. Block motion data syntax                       */
1762             return -1;
1763     }
1764     if (dirac_unpack_idwt_params(s))                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1765         return -1;
1766
1767     init_planes(s);
1768     return 0;
1769 }
1770
1771 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1772 {
1773     DiracFrame *out = s->delay_frames[0];
1774     int i, out_idx  = 0;
1775     int ret;
1776
1777     /* find frame with lowest picture number */
1778     for (i = 1; s->delay_frames[i]; i++)
1779         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1780             out     = s->delay_frames[i];
1781             out_idx = i;
1782         }
1783
1784     for (i = out_idx; s->delay_frames[i]; i++)
1785         s->delay_frames[i] = s->delay_frames[i+1];
1786
1787     if (out) {
1788         out->avframe->reference ^= DELAYED_PIC_REF;
1789         *got_frame = 1;
1790         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1791             return ret;
1792     }
1793
1794     return 0;
1795 }
1796
1797 /**
1798  * Dirac Specification ->
1799  * 9.6 Parse Info Header Syntax. parse_info()
1800  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1801  */
1802 #define DATA_UNIT_HEADER_SIZE 13
1803
1804 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1805    inside the function parse_sequence() */
1806 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1807 {
1808     DiracContext *s   = avctx->priv_data;
1809     DiracFrame *pic   = NULL;
1810     int ret, i, parse_code = buf[4];
1811     unsigned tmp;
1812
1813     if (size < DATA_UNIT_HEADER_SIZE)
1814         return -1;
1815
1816     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1817
1818     if (parse_code == pc_seq_header) {
1819         if (s->seen_sequence_header)
1820             return 0;
1821
1822         /* [DIRAC_STD] 10. Sequence header */
1823         if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1824             return -1;
1825
1826         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1827
1828         if (alloc_sequence_buffers(s))
1829             return -1;
1830
1831         s->seen_sequence_header = 1;
1832     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1833         free_sequence_buffers(s);
1834         s->seen_sequence_header = 0;
1835     } else if (parse_code == pc_aux_data) {
1836         if (buf[13] == 1) {     /* encoder implementation/version */
1837             int ver[3];
1838             /* versions older than 1.0.8 don't store quant delta for
1839                subbands with only one codeblock */
1840             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1841                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1842                     s->old_delta_quant = 1;
1843         }
1844     } else if (parse_code & 0x8) {  /* picture data unit */
1845         if (!s->seen_sequence_header) {
1846             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1847             return -1;
1848         }
1849
1850         /* find an unused frame */
1851         for (i = 0; i < MAX_FRAMES; i++)
1852             if (s->all_frames[i].avframe->data[0] == NULL)
1853                 pic = &s->all_frames[i];
1854         if (!pic) {
1855             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1856             return -1;
1857         }
1858
1859         av_frame_unref(pic->avframe);
1860
1861         /* [DIRAC_STD] Defined in 9.6.1 ... */
1862         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1863         if (tmp > 2) {
1864             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1865             return -1;
1866         }
1867         s->num_refs    = tmp;
1868         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1869         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1870         pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1871         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1872         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1873
1874         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1875             return ret;
1876         s->current_picture = pic;
1877         s->plane[0].stride = pic->avframe->linesize[0];
1878         s->plane[1].stride = pic->avframe->linesize[1];
1879         s->plane[2].stride = pic->avframe->linesize[2];
1880
1881         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1882             return AVERROR(ENOMEM);
1883
1884         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1885         if (dirac_decode_picture_header(s))
1886             return -1;
1887
1888         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1889         if (dirac_decode_frame_internal(s))
1890             return -1;
1891     }
1892     return 0;
1893 }
1894
1895 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1896 {
1897     DiracContext *s     = avctx->priv_data;
1898     AVFrame *picture    = data;
1899     uint8_t *buf        = pkt->data;
1900     int buf_size        = pkt->size;
1901     int i, data_unit_size, buf_idx = 0;
1902     int ret;
1903
1904     /* release unused frames */
1905     for (i = 0; i < MAX_FRAMES; i++)
1906         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1907             av_frame_unref(s->all_frames[i].avframe);
1908             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1909         }
1910
1911     s->current_picture = NULL;
1912     *got_frame = 0;
1913
1914     /* end of stream, so flush delayed pics */
1915     if (buf_size == 0)
1916         return get_delayed_pic(s, (AVFrame *)data, got_frame);
1917
1918     for (;;) {
1919         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1920           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1921           BBCD start code search */
1922         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1923             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1924                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1925                 break;
1926         }
1927         /* BBCD found or end of data */
1928         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1929             break;
1930
1931         data_unit_size = AV_RB32(buf+buf_idx+5);
1932         if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1933             if(buf_idx + data_unit_size > buf_size)
1934             av_log(s->avctx, AV_LOG_ERROR,
1935                    "Data unit with size %d is larger than input buffer, discarding\n",
1936                    data_unit_size);
1937             buf_idx += 4;
1938             continue;
1939         }
1940         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1941         if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1942         {
1943             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1944             return -1;
1945         }
1946         buf_idx += data_unit_size;
1947     }
1948
1949     if (!s->current_picture)
1950         return buf_size;
1951
1952     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1953         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1954
1955         s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1956
1957         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1958             int min_num = s->delay_frames[0]->avframe->display_picture_number;
1959             /* Too many delayed frames, so we display the frame with the lowest pts */
1960             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1961             delayed_frame = s->delay_frames[0];
1962
1963             for (i = 1; s->delay_frames[i]; i++)
1964                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1965                     min_num = s->delay_frames[i]->avframe->display_picture_number;
1966
1967             delayed_frame = remove_frame(s->delay_frames, min_num);
1968             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1969         }
1970
1971         if (delayed_frame) {
1972             delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1973             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1974                 return ret;
1975             *got_frame = 1;
1976         }
1977     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1978         /* The right frame at the right time :-) */
1979         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1980             return ret;
1981         *got_frame = 1;
1982     }
1983
1984     if (*got_frame)
1985         s->frame_number = picture->display_picture_number + 1;
1986
1987     return buf_idx;
1988 }
1989
1990 AVCodec ff_dirac_decoder = {
1991     .name           = "dirac",
1992     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1993     .type           = AVMEDIA_TYPE_VIDEO,
1994     .id             = AV_CODEC_ID_DIRAC,
1995     .priv_data_size = sizeof(DiracContext),
1996     .init           = dirac_decode_init,
1997     .close          = dirac_decode_end,
1998     .decode         = dirac_decode_frame,
1999     .capabilities   = CODEC_CAP_DELAY,
2000     .flush          = dirac_decode_flush,
2001 };