git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "golomb.h"
  34 #include "dirac_arith.h"
  35 #include "mpeg12data.h"
  36 #include "libavcodec/mpegvideo.h"
  37 #include "mpegvideoencdsp.h"
  38 #include "dirac_dwt.h"
  39 #include "dirac.h"
  40 #include "diracdsp.h"
  41 #include "videodsp.h"
  42
  43 /**
  44  * The spec limits the number of wavelet decompositions to 4 for both
  45  * level 1 (VC-2) and 128 (long-gop default).
  46  * 5 decompositions is the maximum before >16-bit buffers are needed.
  47  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  48  * the others to 4 decompositions (or 3 for the fidelity filter).
  49  *
  50  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  51  */
  52 #define MAX_DWT_LEVELS 5
  53
  54 /**
  55  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  56  */
  57 #define MAX_REFERENCE_FRAMES 8
  58 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  59 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  60 #define MAX_QUANT 68        /* max quant for VC-2 */
  61 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  62
  63 /**
  64  * DiracBlock->ref flags, if set then the block does MC from the given ref
  65  */
  66 #define DIRAC_REF_MASK_REF1   1
  67 #define DIRAC_REF_MASK_REF2   2
  68 #define DIRAC_REF_MASK_GLOBAL 4
  69
  70 /**
  71  * Value of Picture.reference when Picture is not a reference picture, but
  72  * is held for delayed output.
  73  */
  74 #define DELAYED_PIC_REF 4
  75
  76 #define CALC_PADDING(size, depth)                       \
  77     (((size + (1 << depth) - 1) >> depth) << depth)
  78
  79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  80
  81 typedef struct {
  82     AVFrame *avframe;
  83     int interpolated[3];    /* 1 if hpel[] is valid */
  84     uint8_t *hpel[3][4];
  85     uint8_t *hpel_base[3][4];
  86 } DiracFrame;
  87
  88 typedef struct {
  89     union {
  90         int16_t mv[2][2];
  91         int16_t dc[3];
  92     } u; /* anonymous unions aren't in C99 :( */
  93     uint8_t ref;
  94 } DiracBlock;
  95
  96 typedef struct SubBand {
  97     int level;
  98     int orientation;
  99     int stride;
 100     int width;
 101     int height;
 102     int quant;
 103     IDWTELEM *ibuf;
 104     struct SubBand *parent;
 105
 106     /* for low delay */
 107     unsigned length;
 108     const uint8_t *coeff_data;
 109 } SubBand;
 110
 111 typedef struct Plane {
 112     int width;
 113     int height;
 114     ptrdiff_t stride;
 115
 116     int idwt_width;
 117     int idwt_height;
 118     int idwt_stride;
 119     IDWTELEM *idwt_buf;
 120     IDWTELEM *idwt_buf_base;
 121     IDWTELEM *idwt_tmp;
 122
 123     /* block length */
 124     uint8_t xblen;
 125     uint8_t yblen;
 126     /* block separation (block n+1 starts after this many pixels in block n) */
 127     uint8_t xbsep;
 128     uint8_t ybsep;
 129     /* amount of overspill on each edge (half of the overlap between blocks) */
 130     uint8_t xoffset;
 131     uint8_t yoffset;
 132
 133     SubBand band[MAX_DWT_LEVELS][4];
 134 } Plane;
 135
 136 typedef struct DiracContext {
 137     AVCodecContext *avctx;
 138     MpegvideoEncDSPContext mpvencdsp;
 139     VideoDSPContext vdsp;
 140     DiracDSPContext diracdsp;
 141     GetBitContext gb;
 142     dirac_source_params source;
 143     int seen_sequence_header;
 144     int frame_number;           /* number of the next frame to display       */
 145     Plane plane[3];
 146     int chroma_x_shift;
 147     int chroma_y_shift;
 148
 149     int zero_res;               /* zero residue flag                         */
 150     int is_arith;               /* whether coeffs use arith or golomb coding */
 151     int low_delay;              /* use the low delay syntax                  */
 152     int globalmc_flag;          /* use global motion compensation            */
 153     int num_refs;               /* number of reference pictures              */
 154
 155     /* wavelet decoding */
 156     unsigned wavelet_depth;     /* depth of the IDWT                         */
 157     unsigned wavelet_idx;
 158
 159     /**
 160      * schroedinger older than 1.0.8 doesn't store
 161      * quant delta if only one codebook exists in a band
 162      */
 163     unsigned old_delta_quant;
 164     unsigned codeblock_mode;
 165
 166     struct {
 167         unsigned width;
 168         unsigned height;
 169     } codeblock[MAX_DWT_LEVELS+1];
 170
 171     struct {
 172         unsigned num_x;         /* number of horizontal slices               */
 173         unsigned num_y;         /* number of vertical slices                 */
 174         AVRational bytes;       /* average bytes per slice                   */
 175         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 176     } lowdelay;
 177
 178     struct {
 179         int pan_tilt[2];        /* pan/tilt vector                           */
 180         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 181         int perspective[2];     /* perspective vector                        */
 182         unsigned zrs_exp;
 183         unsigned perspective_exp;
 184     } globalmc[2];
 185
 186     /* motion compensation */
 187     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 188     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 189     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 190
 191     int blwidth;                /* number of blocks (horizontally)           */
 192     int blheight;               /* number of blocks (vertically)             */
 193     int sbwidth;                /* number of superblocks (horizontally)      */
 194     int sbheight;               /* number of superblocks (vertically)        */
 195
 196     uint8_t *sbsplit;
 197     DiracBlock *blmotion;
 198
 199     uint8_t *edge_emu_buffer[4];
 200     uint8_t *edge_emu_buffer_base;
 201
 202     uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
 203     uint8_t *mcscratch;
 204     int buffer_stride;
 205
 206     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 207
 208     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 209     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 210     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 211     dirac_weight_func weight_func;
 212     dirac_biweight_func biweight_func;
 213
 214     DiracFrame *current_picture;
 215     DiracFrame *ref_pics[2];
 216
 217     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 218     DiracFrame *delay_frames[MAX_DELAY+1];
 219     DiracFrame all_frames[MAX_FRAMES];
 220 } DiracContext;
 221
 222 /**
 223  * Dirac Specification ->
 224  * Parse code values. 9.6.1 Table 9.1
 225  */
 226 enum dirac_parse_code {
 227     pc_seq_header         = 0x00,
 228     pc_eos                = 0x10,
 229     pc_aux_data           = 0x20,
 230     pc_padding            = 0x30,
 231 };
 232
 233 enum dirac_subband {
 234     subband_ll = 0,
 235     subband_hl = 1,
 236     subband_lh = 2,
 237     subband_hh = 3,
 238     subband_nb,
 239 };
 240
 241 static const uint8_t default_qmat[][4][4] = {
 242     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 243     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 244     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 245     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 246     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 247     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 248     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 249 };
 250
 251 static const int qscale_tab[MAX_QUANT+1] = {
 252     4,     5,     6,     7,     8,    10,    11,    13,
 253     16,    19,    23,    27,    32,    38,    45,    54,
 254     64,    76,    91,   108,   128,   152,   181,   215,
 255     256,   304,   362,   431,   512,   609,   724,   861,
 256     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 257     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 258     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 259     65536, 77936
 260 };
 261
 262 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 263     1,     2,     3,     4,     4,     5,     6,     7,
 264     8,    10,    12,    14,    16,    19,    23,    27,
 265     32,    38,    46,    54,    64,    76,    91,   108,
 266     128,   152,   181,   216,   256,   305,   362,   431,
 267     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 268     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 269     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 270     32768, 38968
 271 };
 272
 273 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 274     1,     2,     2,     3,     3,     4,     4,     5,
 275     6,     7,     9,    10,    12,    14,    17,    20,
 276     24,    29,    34,    41,    48,    57,    68,    81,
 277     96,   114,   136,   162,   192,   228,   272,   323,
 278     384,   457,   543,   646,   768,   913,  1086,  1292,
 279     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 280     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 281     24576, 29226
 282 };
 283
 284 /* magic number division by 3 from schroedinger */
 285 static inline int divide3(int x)
 286 {
 287     return ((x+1)*21845 + 10922) >> 16;
 288 }
 289
 290 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 291 {
 292     DiracFrame *remove_pic = NULL;
 293     int i, remove_idx = -1;
 294
 295     for (i = 0; framelist[i]; i++)
 296         if (framelist[i]->avframe->display_picture_number == picnum) {
 297             remove_pic = framelist[i];
 298             remove_idx = i;
 299         }
 300
 301     if (remove_pic)
 302         for (i = remove_idx; framelist[i]; i++)
 303             framelist[i] = framelist[i+1];
 304
 305     return remove_pic;
 306 }
 307
 308 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 309 {
 310     int i;
 311     for (i = 0; i < maxframes; i++)
 312         if (!framelist[i]) {
 313             framelist[i] = frame;
 314             return 0;
 315         }
 316     return -1;
 317 }
 318
 319 static int alloc_sequence_buffers(DiracContext *s)
 320 {
 321     int sbwidth  = DIVRNDUP(s->source.width,  4);
 322     int sbheight = DIVRNDUP(s->source.height, 4);
 323     int i, w, h, top_padding;
 324
 325     /* todo: think more about this / use or set Plane here */
 326     for (i = 0; i < 3; i++) {
 327         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 328         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 329         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 330         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 331
 332         /* we allocate the max we support here since num decompositions can
 333          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 334          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 335          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 336          * on each side */
 337         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 338         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 339         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 340
 341         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
 342         s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
 343         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 344         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 345             return AVERROR(ENOMEM);
 346     }
 347
 348     /* fixme: allocate using real stride here */
 349     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 350     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 351
 352     if (!s->sbsplit || !s->blmotion)
 353         return AVERROR(ENOMEM);
 354     return 0;
 355 }
 356
 357 static int alloc_buffers(DiracContext *s, int stride)
 358 {
 359     int w = s->source.width;
 360     int h = s->source.height;
 361
 362     av_assert0(stride >= w);
 363     stride += 64;
 364
 365     if (s->buffer_stride >= stride)
 366         return 0;
 367     s->buffer_stride = 0;
 368
 369     av_freep(&s->edge_emu_buffer_base);
 370     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 371     av_freep(&s->mctmp);
 372     av_freep(&s->mcscratch);
 373
 374     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 375
 376     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 377     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 378
 379     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 380         return AVERROR(ENOMEM);
 381
 382     s->buffer_stride = stride;
 383     return 0;
 384 }
 385
 386 static void free_sequence_buffers(DiracContext *s)
 387 {
 388     int i, j, k;
 389
 390     for (i = 0; i < MAX_FRAMES; i++) {
 391         if (s->all_frames[i].avframe->data[0]) {
 392             av_frame_unref(s->all_frames[i].avframe);
 393             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 394         }
 395
 396         for (j = 0; j < 3; j++)
 397             for (k = 1; k < 4; k++)
 398                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 399     }
 400
 401     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 402     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 403
 404     for (i = 0; i < 3; i++) {
 405         av_freep(&s->plane[i].idwt_buf_base);
 406         av_freep(&s->plane[i].idwt_tmp);
 407     }
 408
 409     s->buffer_stride = 0;
 410     av_freep(&s->sbsplit);
 411     av_freep(&s->blmotion);
 412     av_freep(&s->edge_emu_buffer_base);
 413
 414     av_freep(&s->mctmp);
 415     av_freep(&s->mcscratch);
 416 }
 417
 418 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 419 {
 420     DiracContext *s = avctx->priv_data;
 421     int i;
 422
 423     s->avctx = avctx;
 424     s->frame_number = -1;
 425
 426     ff_diracdsp_init(&s->diracdsp);
 427     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 428     ff_videodsp_init(&s->vdsp, 8);
 429
 430     for (i = 0; i < MAX_FRAMES; i++) {
 431         s->all_frames[i].avframe = av_frame_alloc();
 432         if (!s->all_frames[i].avframe) {
 433             while (i > 0)
 434                 av_frame_free(&s->all_frames[--i].avframe);
 435             return AVERROR(ENOMEM);
 436         }
 437     }
 438
 439     return 0;
 440 }
 441
 442 static void dirac_decode_flush(AVCodecContext *avctx)
 443 {
 444     DiracContext *s = avctx->priv_data;
 445     free_sequence_buffers(s);
 446     s->seen_sequence_header = 0;
 447     s->frame_number = -1;
 448 }
 449
 450 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 451 {
 452     DiracContext *s = avctx->priv_data;
 453     int i;
 454
 455     dirac_decode_flush(avctx);
 456     for (i = 0; i < MAX_FRAMES; i++)
 457         av_frame_free(&s->all_frames[i].avframe);
 458
 459     return 0;
 460 }
 461
 462 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 463
 464 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 465                                       SubBand *b, IDWTELEM *buf, int x, int y)
 466 {
 467     int coeff, sign;
 468     int sign_pred = 0;
 469     int pred_ctx = CTX_ZPZN_F1;
 470
 471     /* Check if the parent subband has a 0 in the corresponding position */
 472     if (b->parent)
 473         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 474
 475     if (b->orientation == subband_hl)
 476         sign_pred = buf[-b->stride];
 477
 478     /* Determine if the pixel has only zeros in its neighbourhood */
 479     if (x) {
 480         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 481         if (b->orientation == subband_lh)
 482             sign_pred = buf[-1];
 483     } else {
 484         pred_ctx += !buf[-b->stride];
 485     }
 486
 487     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 488     if (coeff) {
 489         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 490         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 491         coeff = (coeff ^ -sign) + sign;
 492     }
 493     *buf = coeff;
 494 }
 495
 496 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 497 {
 498     int sign, coeff;
 499
 500     coeff = svq3_get_ue_golomb(gb);
 501     if (coeff) {
 502         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 503         sign  = get_bits1(gb);
 504         coeff = (coeff ^ -sign) + sign;
 505     }
 506     return coeff;
 507 }
 508
 509 /**
 510  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 511  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 512  */
 513 static inline void codeblock(DiracContext *s, SubBand *b,
 514                              GetBitContext *gb, DiracArith *c,
 515                              int left, int right, int top, int bottom,
 516                              int blockcnt_one, int is_arith)
 517 {
 518     int x, y, zero_block;
 519     int qoffset, qfactor;
 520     IDWTELEM *buf;
 521
 522     /* check for any coded coefficients in this codeblock */
 523     if (!blockcnt_one) {
 524         if (is_arith)
 525             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 526         else
 527             zero_block = get_bits1(gb);
 528
 529         if (zero_block)
 530             return;
 531     }
 532
 533     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 534         int quant = b->quant;
 535         if (is_arith)
 536             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 537         else
 538             quant += dirac_get_se_golomb(gb);
 539         if (quant < 0) {
 540             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 541             return;
 542         }
 543         b->quant = quant;
 544     }
 545
 546     b->quant = FFMIN(b->quant, MAX_QUANT);
 547
 548     qfactor = qscale_tab[b->quant];
 549     /* TODO: context pointer? */
 550     if (!s->num_refs)
 551         qoffset = qoffset_intra_tab[b->quant];
 552     else
 553         qoffset = qoffset_inter_tab[b->quant];
 554
 555     buf = b->ibuf + top * b->stride;
 556     for (y = top; y < bottom; y++) {
 557         for (x = left; x < right; x++) {
 558             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 559             if (is_arith)
 560                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 561             else
 562                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 563         }
 564         buf += b->stride;
 565     }
 566 }
 567
 568 /**
 569  * Dirac Specification ->
 570  * 13.3 intra_dc_prediction(band)
 571  */
 572 static inline void intra_dc_prediction(SubBand *b)
 573 {
 574     IDWTELEM *buf = b->ibuf;
 575     int x, y;
 576
 577     for (x = 1; x < b->width; x++)
 578         buf[x] += buf[x-1];
 579     buf += b->stride;
 580
 581     for (y = 1; y < b->height; y++) {
 582         buf[0] += buf[-b->stride];
 583
 584         for (x = 1; x < b->width; x++) {
 585             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 586             buf[x]  += divide3(pred);
 587         }
 588         buf += b->stride;
 589     }
 590 }
 591
 592 /**
 593  * Dirac Specification ->
 594  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 595  */
 596 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 597 {
 598     int cb_x, cb_y, left, right, top, bottom;
 599     DiracArith c;
 600     GetBitContext gb;
 601     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 602     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 603     int blockcnt_one = (cb_width + cb_height) == 2;
 604
 605     if (!b->length)
 606         return;
 607
 608     init_get_bits8(&gb, b->coeff_data, b->length);
 609
 610     if (is_arith)
 611         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 612
 613     top = 0;
 614     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 615         bottom = (b->height * (cb_y+1LL)) / cb_height;
 616         left = 0;
 617         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 618             right = (b->width * (cb_x+1LL)) / cb_width;
 619             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 620             left = right;
 621         }
 622         top = bottom;
 623     }
 624
 625     if (b->orientation == subband_ll && s->num_refs == 0)
 626         intra_dc_prediction(b);
 627 }
 628
 629 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 630 {
 631     DiracContext *s = avctx->priv_data;
 632     decode_subband_internal(s, b, 1);
 633     return 0;
 634 }
 635
 636 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 637 {
 638     DiracContext *s = avctx->priv_data;
 639     SubBand **b     = arg;
 640     decode_subband_internal(s, *b, 0);
 641     return 0;
 642 }
 643
 644 /**
 645  * Dirac Specification ->
 646  * [DIRAC_STD] 13.4.1 core_transform_data()
 647  */
 648 static void decode_component(DiracContext *s, int comp)
 649 {
 650     AVCodecContext *avctx = s->avctx;
 651     SubBand *bands[3*MAX_DWT_LEVELS+1];
 652     enum dirac_subband orientation;
 653     int level, num_bands = 0;
 654
 655     /* Unpack all subbands at all levels. */
 656     for (level = 0; level < s->wavelet_depth; level++) {
 657         for (orientation = !!level; orientation < 4; orientation++) {
 658             SubBand *b = &s->plane[comp].band[level][orientation];
 659             bands[num_bands++] = b;
 660
 661             align_get_bits(&s->gb);
 662             /* [DIRAC_STD] 13.4.2 subband() */
 663             b->length = svq3_get_ue_golomb(&s->gb);
 664             if (b->length) {
 665                 b->quant = svq3_get_ue_golomb(&s->gb);
 666                 align_get_bits(&s->gb);
 667                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 668                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 669                 skip_bits_long(&s->gb, b->length*8);
 670             }
 671         }
 672         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 673         if (s->is_arith)
 674             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 675                            NULL, 4-!!level, sizeof(SubBand));
 676     }
 677     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 678     if (!s->is_arith)
 679         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 680 }
 681
 682 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 683 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 684 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 685                              int slice_x, int slice_y, int bits_end,
 686                              SubBand *b1, SubBand *b2)
 687 {
 688     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 689     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 690     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 691     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 692
 693     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 694     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 695
 696     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 697     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 698     int x, y;
 699     /* we have to constantly check for overread since the spec explicitly
 700        requires this, with the meaning that all remaining coeffs are set to 0 */
 701     if (get_bits_count(gb) >= bits_end)
 702         return;
 703
 704     for (y = top; y < bottom; y++) {
 705         for (x = left; x < right; x++) {
 706             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 707             if (get_bits_count(gb) >= bits_end)
 708                 return;
 709             if (buf2) {
 710                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 711                 if (get_bits_count(gb) >= bits_end)
 712                     return;
 713             }
 714         }
 715         buf1 += b1->stride;
 716         if (buf2)
 717             buf2 += b2->stride;
 718     }
 719 }
 720
 721 struct lowdelay_slice {
 722     GetBitContext gb;
 723     int slice_x;
 724     int slice_y;
 725     int bytes;
 726 };
 727
 728
 729 /**
 730  * Dirac Specification ->
 731  * 13.5.2 Slices. slice(sx,sy)
 732  */
 733 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 734 {
 735     DiracContext *s = avctx->priv_data;
 736     struct lowdelay_slice *slice = arg;
 737     GetBitContext *gb = &slice->gb;
 738     enum dirac_subband orientation;
 739     int level, quant, chroma_bits, chroma_end;
 740
 741     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 742     int length_bits = av_log2(8 * slice->bytes)+1;
 743     int luma_bits   = get_bits_long(gb, length_bits);
 744     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 745
 746     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 747     for (level = 0; level < s->wavelet_depth; level++)
 748         for (orientation = !!level; orientation < 4; orientation++) {
 749             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 750             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 751                              &s->plane[0].band[level][orientation], NULL);
 752         }
 753
 754     /* consume any unused bits from luma */
 755     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 756
 757     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 758     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 759     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 760     for (level = 0; level < s->wavelet_depth; level++)
 761         for (orientation = !!level; orientation < 4; orientation++) {
 762             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 763             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 764                              &s->plane[1].band[level][orientation],
 765                              &s->plane[2].band[level][orientation]);
 766         }
 767
 768     return 0;
 769 }
 770
 771 /**
 772  * Dirac Specification ->
 773  * 13.5.1 low_delay_transform_data()
 774  */
 775 static int decode_lowdelay(DiracContext *s)
 776 {
 777     AVCodecContext *avctx = s->avctx;
 778     int slice_x, slice_y, bytes, bufsize;
 779     const uint8_t *buf;
 780     struct lowdelay_slice *slices;
 781     int slice_num = 0;
 782
 783     slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 784     if (!slices)
 785         return AVERROR(ENOMEM);
 786
 787     align_get_bits(&s->gb);
 788     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 789     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 790     bufsize = get_bits_left(&s->gb);
 791
 792     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 793         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 794             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 795                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 796
 797             slices[slice_num].bytes   = bytes;
 798             slices[slice_num].slice_x = slice_x;
 799             slices[slice_num].slice_y = slice_y;
 800             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 801             slice_num++;
 802
 803             buf     += bytes;
 804             bufsize -= bytes*8;
 805         }
 806
 807     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 808                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 809     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 810     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 811     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 812     av_free(slices);
 813     return 0;
 814 }
 815
 816 static void init_planes(DiracContext *s)
 817 {
 818     int i, w, h, level, orientation;
 819
 820     for (i = 0; i < 3; i++) {
 821         Plane *p = &s->plane[i];
 822
 823         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 824         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 825         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 826         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 827         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 828
 829         for (level = s->wavelet_depth-1; level >= 0; level--) {
 830             w = w>>1;
 831             h = h>>1;
 832             for (orientation = !!level; orientation < 4; orientation++) {
 833                 SubBand *b = &p->band[level][orientation];
 834
 835                 b->ibuf   = p->idwt_buf;
 836                 b->level  = level;
 837                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 838                 b->width  = w;
 839                 b->height = h;
 840                 b->orientation = orientation;
 841
 842                 if (orientation & 1)
 843                     b->ibuf += w;
 844                 if (orientation > 1)
 845                     b->ibuf += b->stride>>1;
 846
 847                 if (level)
 848                     b->parent = &p->band[level-1][orientation];
 849             }
 850         }
 851
 852         if (i > 0) {
 853             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 854             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 855             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 856             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 857         }
 858
 859         p->xoffset = (p->xblen - p->xbsep)/2;
 860         p->yoffset = (p->yblen - p->ybsep)/2;
 861     }
 862 }
 863
 864 /**
 865  * Unpack the motion compensation parameters
 866  * Dirac Specification ->
 867  * 11.2 Picture prediction data. picture_prediction()
 868  */
 869 static int dirac_unpack_prediction_parameters(DiracContext *s)
 870 {
 871     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 872     static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
 873
 874     GetBitContext *gb = &s->gb;
 875     unsigned idx, ref;
 876
 877     align_get_bits(gb);
 878     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 879     /* Luma and Chroma are equal. 11.2.3 */
 880     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 881
 882     if (idx > 4) {
 883         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 884         return -1;
 885     }
 886
 887     if (idx == 0) {
 888         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 889         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 890         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 891         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 892     } else {
 893         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 894         s->plane[0].xblen = default_blen[idx-1];
 895         s->plane[0].yblen = default_blen[idx-1];
 896         s->plane[0].xbsep = default_bsep[idx-1];
 897         s->plane[0].ybsep = default_bsep[idx-1];
 898     }
 899     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 900       Calculated in function dirac_unpack_block_motion_data */
 901
 902     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 903         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 904         return -1;
 905     }
 906     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 907         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 908         return -1;
 909     }
 910     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 911         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 912         return -1;
 913     }
 914
 915     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 916       Read motion vector precision */
 917     s->mv_precision = svq3_get_ue_golomb(gb);
 918     if (s->mv_precision > 3) {
 919         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 920         return -1;
 921     }
 922
 923     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 924       Read the global motion compensation parameters */
 925     s->globalmc_flag = get_bits1(gb);
 926     if (s->globalmc_flag) {
 927         memset(s->globalmc, 0, sizeof(s->globalmc));
 928         /* [DIRAC_STD] pan_tilt(gparams) */
 929         for (ref = 0; ref < s->num_refs; ref++) {
 930             if (get_bits1(gb)) {
 931                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 932                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 933             }
 934             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 935                zoom/rotation/shear parameters */
 936             if (get_bits1(gb)) {
 937                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 938                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 939                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 940                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 941                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 942             } else {
 943                 s->globalmc[ref].zrs[0][0] = 1;
 944                 s->globalmc[ref].zrs[1][1] = 1;
 945             }
 946             /* [DIRAC_STD] perspective(gparams) */
 947             if (get_bits1(gb)) {
 948                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 949                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 950                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 951             }
 952         }
 953     }
 954
 955     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 956       Picture prediction mode, not currently used. */
 957     if (svq3_get_ue_golomb(gb)) {
 958         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 959         return -1;
 960     }
 961
 962     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 963        just data read, weight calculation will be done later on. */
 964     s->weight_log2denom = 1;
 965     s->weight[0]        = 1;
 966     s->weight[1]        = 1;
 967
 968     if (get_bits1(gb)) {
 969         s->weight_log2denom = svq3_get_ue_golomb(gb);
 970         s->weight[0] = dirac_get_se_golomb(gb);
 971         if (s->num_refs == 2)
 972             s->weight[1] = dirac_get_se_golomb(gb);
 973     }
 974     return 0;
 975 }
 976
 977 /**
 978  * Dirac Specification ->
 979  * 11.3 Wavelet transform data. wavelet_transform()
 980  */
 981 static int dirac_unpack_idwt_params(DiracContext *s)
 982 {
 983     GetBitContext *gb = &s->gb;
 984     int i, level;
 985     unsigned tmp;
 986
 987 #define CHECKEDREAD(dst, cond, errmsg) \
 988     tmp = svq3_get_ue_golomb(gb); \
 989     if (cond) { \
 990         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
 991         return -1; \
 992     }\
 993     dst = tmp;
 994
 995     align_get_bits(gb);
 996
 997     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
 998     if (s->zero_res)
 999         return 0;
1000
1001     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
1002     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1003
1004     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1005
1006     if (!s->low_delay) {
1007         /* Codeblock parameters (core syntax only) */
1008         if (get_bits1(gb)) {
1009             for (i = 0; i <= s->wavelet_depth; i++) {
1010                 CHECKEDREAD(s->codeblock[i].width , tmp < 1 || tmp > (s->avctx->width >>s->wavelet_depth-i), "codeblock width invalid\n")
1011                 CHECKEDREAD(s->codeblock[i].height, tmp < 1 || tmp > (s->avctx->height>>s->wavelet_depth-i), "codeblock height invalid\n")
1012             }
1013
1014             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1015         } else
1016             for (i = 0; i <= s->wavelet_depth; i++)
1017                 s->codeblock[i].width = s->codeblock[i].height = 1;
1018     } else {
1019         /* Slice parameters + quantization matrix*/
1020         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1021         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
1022         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
1023         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1024         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1025
1026         if (s->lowdelay.bytes.den <= 0) {
1027             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1028             return AVERROR_INVALIDDATA;
1029         }
1030
1031         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1032         if (get_bits1(gb)) {
1033             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1034             /* custom quantization matrix */
1035             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1036             for (level = 0; level < s->wavelet_depth; level++) {
1037                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1038                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1039                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1040             }
1041         } else {
1042             if (s->wavelet_depth > 4) {
1043                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1044                 return AVERROR_INVALIDDATA;
1045             }
1046             /* default quantization matrix */
1047             for (level = 0; level < s->wavelet_depth; level++)
1048                 for (i = 0; i < 4; i++) {
1049                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1050                     /* haar with no shift differs for different depths */
1051                     if (s->wavelet_idx == 3)
1052                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1053                 }
1054         }
1055     }
1056     return 0;
1057 }
1058
1059 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1060 {
1061     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1062
1063     if (!(x|y))
1064         return 0;
1065     else if (!y)
1066         return sbsplit[-1];
1067     else if (!x)
1068         return sbsplit[-stride];
1069
1070     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1071 }
1072
1073 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1074 {
1075     int pred;
1076
1077     if (!(x|y))
1078         return 0;
1079     else if (!y)
1080         return block[-1].ref & refmask;
1081     else if (!x)
1082         return block[-stride].ref & refmask;
1083
1084     /* return the majority */
1085     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1086     return (pred >> 1) & refmask;
1087 }
1088
1089 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1090 {
1091     int i, n = 0;
1092
1093     memset(block->u.dc, 0, sizeof(block->u.dc));
1094
1095     if (x && !(block[-1].ref & 3)) {
1096         for (i = 0; i < 3; i++)
1097             block->u.dc[i] += block[-1].u.dc[i];
1098         n++;
1099     }
1100
1101     if (y && !(block[-stride].ref & 3)) {
1102         for (i = 0; i < 3; i++)
1103             block->u.dc[i] += block[-stride].u.dc[i];
1104         n++;
1105     }
1106
1107     if (x && y && !(block[-1-stride].ref & 3)) {
1108         for (i = 0; i < 3; i++)
1109             block->u.dc[i] += block[-1-stride].u.dc[i];
1110         n++;
1111     }
1112
1113     if (n == 2) {
1114         for (i = 0; i < 3; i++)
1115             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1116     } else if (n == 3) {
1117         for (i = 0; i < 3; i++)
1118             block->u.dc[i] = divide3(block->u.dc[i]);
1119     }
1120 }
1121
1122 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1123 {
1124     int16_t *pred[3];
1125     int refmask = ref+1;
1126     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1127     int n = 0;
1128
1129     if (x && (block[-1].ref & mask) == refmask)
1130         pred[n++] = block[-1].u.mv[ref];
1131
1132     if (y && (block[-stride].ref & mask) == refmask)
1133         pred[n++] = block[-stride].u.mv[ref];
1134
1135     if (x && y && (block[-stride-1].ref & mask) == refmask)
1136         pred[n++] = block[-stride-1].u.mv[ref];
1137
1138     switch (n) {
1139     case 0:
1140         block->u.mv[ref][0] = 0;
1141         block->u.mv[ref][1] = 0;
1142         break;
1143     case 1:
1144         block->u.mv[ref][0] = pred[0][0];
1145         block->u.mv[ref][1] = pred[0][1];
1146         break;
1147     case 2:
1148         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1149         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1150         break;
1151     case 3:
1152         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1153         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1154         break;
1155     }
1156 }
1157
1158 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1159 {
1160     int ez      = s->globalmc[ref].zrs_exp;
1161     int ep      = s->globalmc[ref].perspective_exp;
1162     int (*A)[2] = s->globalmc[ref].zrs;
1163     int *b      = s->globalmc[ref].pan_tilt;
1164     int *c      = s->globalmc[ref].perspective;
1165
1166     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1167     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1168     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1169
1170     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1171     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1172 }
1173
1174 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1175                                 int stride, int x, int y)
1176 {
1177     int i;
1178
1179     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1180     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1181
1182     if (s->num_refs == 2) {
1183         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1184         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1185     }
1186
1187     if (!block->ref) {
1188         pred_block_dc(block, stride, x, y);
1189         for (i = 0; i < 3; i++)
1190             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1191         return;
1192     }
1193
1194     if (s->globalmc_flag) {
1195         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1196         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1197     }
1198
1199     for (i = 0; i < s->num_refs; i++)
1200         if (block->ref & (i+1)) {
1201             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1202                 global_mv(s, block, x, y, i);
1203             } else {
1204                 pred_mv(block, stride, x, y, i);
1205                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1206                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1207             }
1208         }
1209 }
1210
1211 /**
1212  * Copies the current block to the other blocks covered by the current superblock split mode
1213  */
1214 static void propagate_block_data(DiracBlock *block, int stride, int size)
1215 {
1216     int x, y;
1217     DiracBlock *dst = block;
1218
1219     for (x = 1; x < size; x++)
1220         dst[x] = *block;
1221
1222     for (y = 1; y < size; y++) {
1223         dst += stride;
1224         for (x = 0; x < size; x++)
1225             dst[x] = *block;
1226     }
1227 }
1228
1229 /**
1230  * Dirac Specification ->
1231  * 12. Block motion data syntax
1232  */
1233 static int dirac_unpack_block_motion_data(DiracContext *s)
1234 {
1235     GetBitContext *gb = &s->gb;
1236     uint8_t *sbsplit = s->sbsplit;
1237     int i, x, y, q, p;
1238     DiracArith arith[8];
1239
1240     align_get_bits(gb);
1241
1242     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1243     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1244     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1245     s->blwidth  = 4 * s->sbwidth;
1246     s->blheight = 4 * s->sbheight;
1247
1248     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1249        decode superblock split modes */
1250     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1251     for (y = 0; y < s->sbheight; y++) {
1252         for (x = 0; x < s->sbwidth; x++) {
1253             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1254             if (split > 2)
1255                 return -1;
1256             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1257         }
1258         sbsplit += s->sbwidth;
1259     }
1260
1261     /* setup arith decoding */
1262     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1263     for (i = 0; i < s->num_refs; i++) {
1264         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1265         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1266     }
1267     for (i = 0; i < 3; i++)
1268         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1269
1270     for (y = 0; y < s->sbheight; y++)
1271         for (x = 0; x < s->sbwidth; x++) {
1272             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1273             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1274
1275             for (q = 0; q < blkcnt; q++)
1276                 for (p = 0; p < blkcnt; p++) {
1277                     int bx = 4 * x + p*step;
1278                     int by = 4 * y + q*step;
1279                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1280                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1281                     propagate_block_data(block, s->blwidth, step);
1282                 }
1283         }
1284
1285     return 0;
1286 }
1287
1288 static int weight(int i, int blen, int offset)
1289 {
1290 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1291     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1292
1293     if (i < 2*offset)
1294         return ROLLOFF(i);
1295     else if (i > blen-1 - 2*offset)
1296         return ROLLOFF(blen-1 - i);
1297     return 8;
1298 }
1299
1300 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1301                                  int left, int right, int wy)
1302 {
1303     int x;
1304     for (x = 0; left && x < p->xblen >> 1; x++)
1305         obmc_weight[x] = wy*8;
1306     for (; x < p->xblen >> right; x++)
1307         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1308     for (; x < p->xblen; x++)
1309         obmc_weight[x] = wy*8;
1310     for (; x < stride; x++)
1311         obmc_weight[x] = 0;
1312 }
1313
1314 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1315                              int left, int right, int top, int bottom)
1316 {
1317     int y;
1318     for (y = 0; top && y < p->yblen >> 1; y++) {
1319         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1320         obmc_weight += stride;
1321     }
1322     for (; y < p->yblen >> bottom; y++) {
1323         int wy = weight(y, p->yblen, p->yoffset);
1324         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1325         obmc_weight += stride;
1326     }
1327     for (; y < p->yblen; y++) {
1328         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1329         obmc_weight += stride;
1330     }
1331 }
1332
1333 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1334 {
1335     int top = !by;
1336     int bottom = by == s->blheight-1;
1337
1338     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1339     if (top || bottom || by == 1) {
1340         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1341         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1342         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1343     }
1344 }
1345
1346 static const uint8_t epel_weights[4][4][4] = {
1347     {{ 16,  0,  0,  0 },
1348      { 12,  4,  0,  0 },
1349      {  8,  8,  0,  0 },
1350      {  4, 12,  0,  0 }},
1351     {{ 12,  0,  4,  0 },
1352      {  9,  3,  3,  1 },
1353      {  6,  6,  2,  2 },
1354      {  3,  9,  1,  3 }},
1355     {{  8,  0,  8,  0 },
1356      {  6,  2,  6,  2 },
1357      {  4,  4,  4,  4 },
1358      {  2,  6,  2,  6 }},
1359     {{  4,  0, 12,  0 },
1360      {  3,  1,  9,  3 },
1361      {  2,  2,  6,  6 },
1362      {  1,  3,  3,  9 }}
1363 };
1364
1365 /**
1366  * For block x,y, determine which of the hpel planes to do bilinear
1367  * interpolation from and set src[] to the location in each hpel plane
1368  * to MC from.
1369  *
1370  * @return the index of the put_dirac_pixels_tab function to use
1371  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1372  */
1373 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1374                      int x, int y, int ref, int plane)
1375 {
1376     Plane *p = &s->plane[plane];
1377     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1378     int motion_x = block->u.mv[ref][0];
1379     int motion_y = block->u.mv[ref][1];
1380     int mx, my, i, epel, nplanes = 0;
1381
1382     if (plane) {
1383         motion_x >>= s->chroma_x_shift;
1384         motion_y >>= s->chroma_y_shift;
1385     }
1386
1387     mx         = motion_x & ~(-1U << s->mv_precision);
1388     my         = motion_y & ~(-1U << s->mv_precision);
1389     motion_x >>= s->mv_precision;
1390     motion_y >>= s->mv_precision;
1391     /* normalize subpel coordinates to epel */
1392     /* TODO: template this function? */
1393     mx      <<= 3 - s->mv_precision;
1394     my      <<= 3 - s->mv_precision;
1395
1396     x += motion_x;
1397     y += motion_y;
1398     epel = (mx|my)&1;
1399
1400     /* hpel position */
1401     if (!((mx|my)&3)) {
1402         nplanes = 1;
1403         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1404     } else {
1405         /* qpel or epel */
1406         nplanes = 4;
1407         for (i = 0; i < 4; i++)
1408             src[i] = ref_hpel[i] + y*p->stride + x;
1409
1410         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1411            we increment x/y because the edge changes for half of the pixels */
1412         if (mx > 4) {
1413             src[0] += 1;
1414             src[2] += 1;
1415             x++;
1416         }
1417         if (my > 4) {
1418             src[0] += p->stride;
1419             src[1] += p->stride;
1420             y++;
1421         }
1422
1423         /* hpel planes are:
1424            [0]: F  [1]: H
1425            [2]: V  [3]: C */
1426         if (!epel) {
1427             /* check if we really only need 2 planes since either mx or my is
1428                a hpel position. (epel weights of 0 handle this there) */
1429             if (!(mx&3)) {
1430                 /* mx == 0: average [0] and [2]
1431                    mx == 4: average [1] and [3] */
1432                 src[!mx] = src[2 + !!mx];
1433                 nplanes = 2;
1434             } else if (!(my&3)) {
1435                 src[0] = src[(my>>1)  ];
1436                 src[1] = src[(my>>1)+1];
1437                 nplanes = 2;
1438             }
1439         } else {
1440             /* adjust the ordering if needed so the weights work */
1441             if (mx > 4) {
1442                 FFSWAP(const uint8_t *, src[0], src[1]);
1443                 FFSWAP(const uint8_t *, src[2], src[3]);
1444             }
1445             if (my > 4) {
1446                 FFSWAP(const uint8_t *, src[0], src[2]);
1447                 FFSWAP(const uint8_t *, src[1], src[3]);
1448             }
1449             src[4] = epel_weights[my&3][mx&3];
1450         }
1451     }
1452
1453     /* fixme: v/h _edge_pos */
1454     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1455         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1456         x < 0 || y < 0) {
1457         for (i = 0; i < nplanes; i++) {
1458             s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1459                                      p->stride, p->stride,
1460                                      p->xblen, p->yblen, x, y,
1461                                      p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1462             src[i] = s->edge_emu_buffer[i];
1463         }
1464     }
1465     return (nplanes>>1) + epel;
1466 }
1467
1468 static void add_dc(uint16_t *dst, int dc, int stride,
1469                    uint8_t *obmc_weight, int xblen, int yblen)
1470 {
1471     int x, y;
1472     dc += 128;
1473
1474     for (y = 0; y < yblen; y++) {
1475         for (x = 0; x < xblen; x += 2) {
1476             dst[x  ] += dc * obmc_weight[x  ];
1477             dst[x+1] += dc * obmc_weight[x+1];
1478         }
1479         dst          += stride;
1480         obmc_weight  += MAX_BLOCKSIZE;
1481     }
1482 }
1483
1484 static void block_mc(DiracContext *s, DiracBlock *block,
1485                      uint16_t *mctmp, uint8_t *obmc_weight,
1486                      int plane, int dstx, int dsty)
1487 {
1488     Plane *p = &s->plane[plane];
1489     const uint8_t *src[5];
1490     int idx;
1491
1492     switch (block->ref&3) {
1493     case 0: /* DC */
1494         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1495         return;
1496     case 1:
1497     case 2:
1498         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1499         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1500         if (s->weight_func)
1501             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1502                            s->weight[0] + s->weight[1], p->yblen);
1503         break;
1504     case 3:
1505         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1506         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1507         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1508         if (s->biweight_func) {
1509             /* fixme: +32 is a quick hack */
1510             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1511             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1512                              s->weight[0], s->weight[1], p->yblen);
1513         } else
1514             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1515         break;
1516     }
1517     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1518 }
1519
1520 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1521 {
1522     Plane *p = &s->plane[plane];
1523     int x, dstx = p->xbsep - p->xoffset;
1524
1525     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1526     mctmp += p->xbsep;
1527
1528     for (x = 1; x < s->blwidth-1; x++) {
1529         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1530         dstx  += p->xbsep;
1531         mctmp += p->xbsep;
1532     }
1533     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1534 }
1535
1536 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1537 {
1538     int idx = 0;
1539     if (xblen > 8)
1540         idx = 1;
1541     if (xblen > 16)
1542         idx = 2;
1543
1544     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1545     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1546     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1547     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1548         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1549         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1550     } else {
1551         s->weight_func   = NULL;
1552         s->biweight_func = NULL;
1553     }
1554 }
1555
1556 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1557 {
1558     /* chroma allocates an edge of 8 when subsampled
1559        which for 4:2:2 means an h edge of 16 and v edge of 8
1560        just use 8 for everything for the moment */
1561     int i, edge = EDGE_WIDTH/2;
1562
1563     ref->hpel[plane][0] = ref->avframe->data[plane];
1564     s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1565
1566     /* no need for hpel if we only have fpel vectors */
1567     if (!s->mv_precision)
1568         return;
1569
1570     for (i = 1; i < 4; i++) {
1571         if (!ref->hpel_base[plane][i])
1572             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1573         /* we need to be 16-byte aligned even for chroma */
1574         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1575     }
1576
1577     if (!ref->interpolated[plane]) {
1578         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1579                                       ref->hpel[plane][3], ref->hpel[plane][0],
1580                                       ref->avframe->linesize[plane], width, height);
1581         s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1582         s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1583         s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1584     }
1585     ref->interpolated[plane] = 1;
1586 }
1587
1588 /**
1589  * Dirac Specification ->
1590  * 13.0 Transform data syntax. transform_data()
1591  */
1592 static int dirac_decode_frame_internal(DiracContext *s)
1593 {
1594     DWTContext d;
1595     int y, i, comp, dsty;
1596     int ret;
1597
1598     if (s->low_delay) {
1599         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1600         for (comp = 0; comp < 3; comp++) {
1601             Plane *p = &s->plane[comp];
1602             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1603         }
1604         if (!s->zero_res) {
1605             if ((ret = decode_lowdelay(s)) < 0)
1606                 return ret;
1607         }
1608     }
1609
1610     for (comp = 0; comp < 3; comp++) {
1611         Plane *p       = &s->plane[comp];
1612         uint8_t *frame = s->current_picture->avframe->data[comp];
1613
1614         /* FIXME: small resolutions */
1615         for (i = 0; i < 4; i++)
1616             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1617
1618         if (!s->zero_res && !s->low_delay)
1619         {
1620             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1621             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1622         }
1623         if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1624                                   s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1625             return -1;
1626
1627         if (!s->num_refs) { /* intra */
1628             for (y = 0; y < p->height; y += 16) {
1629                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1630                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1631                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1632             }
1633         } else { /* inter */
1634             int rowheight = p->ybsep*p->stride;
1635
1636             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1637
1638             for (i = 0; i < s->num_refs; i++)
1639                 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1640
1641             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1642
1643             dsty = -p->yoffset;
1644             for (y = 0; y < s->blheight; y++) {
1645                 int h     = 0,
1646                     start = FFMAX(dsty, 0);
1647                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1648                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1649
1650                 init_obmc_weights(s, p, y);
1651
1652                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1653                     h = p->height - start;
1654                 else
1655                     h = p->ybsep - (start - dsty);
1656                 if (h < 0)
1657                     break;
1658
1659                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1660                 mc_row(s, blocks, mctmp, comp, dsty);
1661
1662                 mctmp += (start - dsty)*p->stride + p->xoffset;
1663                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1664                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1665                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1666
1667                 dsty += p->ybsep;
1668             }
1669         }
1670     }
1671
1672
1673     return 0;
1674 }
1675
1676 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1677 {
1678     int ret, i;
1679     int chroma_x_shift, chroma_y_shift;
1680     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1681
1682     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1683     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1684     ret = ff_get_buffer(avctx, f, flags);
1685     if (ret < 0)
1686         return ret;
1687
1688     for (i = 0; f->data[i]; i++) {
1689         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1690                      f->linesize[i] + 32;
1691         f->data[i] += offset;
1692     }
1693     f->width  = avctx->width;
1694     f->height = avctx->height;
1695
1696     return 0;
1697 }
1698
1699 /**
1700  * Dirac Specification ->
1701  * 11.1.1 Picture Header. picture_header()
1702  */
1703 static int dirac_decode_picture_header(DiracContext *s)
1704 {
1705     int retire, picnum;
1706     int i, j, refnum, refdist;
1707     GetBitContext *gb = &s->gb;
1708
1709     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1710     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1711
1712
1713     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1714
1715     /* if this is the first keyframe after a sequence header, start our
1716        reordering from here */
1717     if (s->frame_number < 0)
1718         s->frame_number = picnum;
1719
1720     s->ref_pics[0] = s->ref_pics[1] = NULL;
1721     for (i = 0; i < s->num_refs; i++) {
1722         refnum = picnum + dirac_get_se_golomb(gb);
1723         refdist = INT_MAX;
1724
1725         /* find the closest reference to the one we want */
1726         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1727         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1728             if (s->ref_frames[j]
1729                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1730                 s->ref_pics[i] = s->ref_frames[j];
1731                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1732             }
1733
1734         if (!s->ref_pics[i] || refdist)
1735             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1736
1737         /* if there were no references at all, allocate one */
1738         if (!s->ref_pics[i])
1739             for (j = 0; j < MAX_FRAMES; j++)
1740                 if (!s->all_frames[j].avframe->data[0]) {
1741                     s->ref_pics[i] = &s->all_frames[j];
1742                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1743                     break;
1744                 }
1745     }
1746
1747     /* retire the reference frames that are not used anymore */
1748     if (s->current_picture->avframe->reference) {
1749         retire = picnum + dirac_get_se_golomb(gb);
1750         if (retire != picnum) {
1751             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1752
1753             if (retire_pic)
1754                 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1755             else
1756                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1757         }
1758
1759         /* if reference array is full, remove the oldest as per the spec */
1760         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1761             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1762             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1763         }
1764     }
1765
1766     if (s->num_refs) {
1767         if (dirac_unpack_prediction_parameters(s))  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1768             return -1;
1769         if (dirac_unpack_block_motion_data(s))      /* [DIRAC_STD] 12. Block motion data syntax                       */
1770             return -1;
1771     }
1772     if (dirac_unpack_idwt_params(s))                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1773         return -1;
1774
1775     init_planes(s);
1776     return 0;
1777 }
1778
1779 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1780 {
1781     DiracFrame *out = s->delay_frames[0];
1782     int i, out_idx  = 0;
1783     int ret;
1784
1785     /* find frame with lowest picture number */
1786     for (i = 1; s->delay_frames[i]; i++)
1787         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1788             out     = s->delay_frames[i];
1789             out_idx = i;
1790         }
1791
1792     for (i = out_idx; s->delay_frames[i]; i++)
1793         s->delay_frames[i] = s->delay_frames[i+1];
1794
1795     if (out) {
1796         out->avframe->reference ^= DELAYED_PIC_REF;
1797         *got_frame = 1;
1798         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1799             return ret;
1800     }
1801
1802     return 0;
1803 }
1804
1805 /**
1806  * Dirac Specification ->
1807  * 9.6 Parse Info Header Syntax. parse_info()
1808  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1809  */
1810 #define DATA_UNIT_HEADER_SIZE 13
1811
1812 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1813    inside the function parse_sequence() */
1814 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1815 {
1816     DiracContext *s   = avctx->priv_data;
1817     DiracFrame *pic   = NULL;
1818     int ret, i, parse_code = buf[4];
1819     unsigned tmp;
1820
1821     if (size < DATA_UNIT_HEADER_SIZE)
1822         return -1;
1823
1824     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1825
1826     if (parse_code == pc_seq_header) {
1827         if (s->seen_sequence_header)
1828             return 0;
1829
1830         /* [DIRAC_STD] 10. Sequence header */
1831         if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1832             return -1;
1833
1834         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1835
1836         if (alloc_sequence_buffers(s))
1837             return -1;
1838
1839         s->seen_sequence_header = 1;
1840     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1841         free_sequence_buffers(s);
1842         s->seen_sequence_header = 0;
1843     } else if (parse_code == pc_aux_data) {
1844         if (buf[13] == 1) {     /* encoder implementation/version */
1845             int ver[3];
1846             /* versions older than 1.0.8 don't store quant delta for
1847                subbands with only one codeblock */
1848             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1849                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1850                     s->old_delta_quant = 1;
1851         }
1852     } else if (parse_code & 0x8) {  /* picture data unit */
1853         if (!s->seen_sequence_header) {
1854             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1855             return -1;
1856         }
1857
1858         /* find an unused frame */
1859         for (i = 0; i < MAX_FRAMES; i++)
1860             if (s->all_frames[i].avframe->data[0] == NULL)
1861                 pic = &s->all_frames[i];
1862         if (!pic) {
1863             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1864             return -1;
1865         }
1866
1867         av_frame_unref(pic->avframe);
1868
1869         /* [DIRAC_STD] Defined in 9.6.1 ... */
1870         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1871         if (tmp > 2) {
1872             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1873             return -1;
1874         }
1875         s->num_refs    = tmp;
1876         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1877         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1878         pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1879         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1880         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1881
1882         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1883             return ret;
1884         s->current_picture = pic;
1885         s->plane[0].stride = pic->avframe->linesize[0];
1886         s->plane[1].stride = pic->avframe->linesize[1];
1887         s->plane[2].stride = pic->avframe->linesize[2];
1888
1889         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1890             return AVERROR(ENOMEM);
1891
1892         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1893         if (dirac_decode_picture_header(s))
1894             return -1;
1895
1896         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1897         if (dirac_decode_frame_internal(s))
1898             return -1;
1899     }
1900     return 0;
1901 }
1902
1903 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1904 {
1905     DiracContext *s     = avctx->priv_data;
1906     AVFrame *picture    = data;
1907     uint8_t *buf        = pkt->data;
1908     int buf_size        = pkt->size;
1909     int i, data_unit_size, buf_idx = 0;
1910     int ret;
1911
1912     /* release unused frames */
1913     for (i = 0; i < MAX_FRAMES; i++)
1914         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1915             av_frame_unref(s->all_frames[i].avframe);
1916             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1917         }
1918
1919     s->current_picture = NULL;
1920     *got_frame = 0;
1921
1922     /* end of stream, so flush delayed pics */
1923     if (buf_size == 0)
1924         return get_delayed_pic(s, (AVFrame *)data, got_frame);
1925
1926     for (;;) {
1927         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1928           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1929           BBCD start code search */
1930         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1931             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1932                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1933                 break;
1934         }
1935         /* BBCD found or end of data */
1936         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1937             break;
1938
1939         data_unit_size = AV_RB32(buf+buf_idx+5);
1940         if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1941             if(buf_idx + data_unit_size > buf_size)
1942             av_log(s->avctx, AV_LOG_ERROR,
1943                    "Data unit with size %d is larger than input buffer, discarding\n",
1944                    data_unit_size);
1945             buf_idx += 4;
1946             continue;
1947         }
1948         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1949         if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1950         {
1951             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1952             return -1;
1953         }
1954         buf_idx += data_unit_size;
1955     }
1956
1957     if (!s->current_picture)
1958         return buf_size;
1959
1960     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1961         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1962
1963         s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1964
1965         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1966             int min_num = s->delay_frames[0]->avframe->display_picture_number;
1967             /* Too many delayed frames, so we display the frame with the lowest pts */
1968             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1969
1970             for (i = 1; s->delay_frames[i]; i++)
1971                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1972                     min_num = s->delay_frames[i]->avframe->display_picture_number;
1973
1974             delayed_frame = remove_frame(s->delay_frames, min_num);
1975             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1976         }
1977
1978         if (delayed_frame) {
1979             delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1980             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1981                 return ret;
1982             *got_frame = 1;
1983         }
1984     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1985         /* The right frame at the right time :-) */
1986         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1987             return ret;
1988         *got_frame = 1;
1989     }
1990
1991     if (*got_frame)
1992         s->frame_number = picture->display_picture_number + 1;
1993
1994     return buf_idx;
1995 }
1996
1997 AVCodec ff_dirac_decoder = {
1998     .name           = "dirac",
1999     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
2000     .type           = AVMEDIA_TYPE_VIDEO,
2001     .id             = AV_CODEC_ID_DIRAC,
2002     .priv_data_size = sizeof(DiracContext),
2003     .init           = dirac_decode_init,
2004     .close          = dirac_decode_end,
2005     .decode         = dirac_decode_frame,
2006     .capabilities   = CODEC_CAP_DELAY,
2007     .flush          = dirac_decode_flush,
2008 };