git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "golomb.h"
  34 #include "dirac_arith.h"
  35 #include "mpeg12data.h"
  36 #include "libavcodec/mpegvideo.h"
  37 #include "mpegvideoencdsp.h"
  38 #include "dirac_dwt.h"
  39 #include "dirac.h"
  40 #include "diracdsp.h"
  41 #include "videodsp.h"
  42
  43 /**
  44  * The spec limits the number of wavelet decompositions to 4 for both
  45  * level 1 (VC-2) and 128 (long-gop default).
  46  * 5 decompositions is the maximum before >16-bit buffers are needed.
  47  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  48  * the others to 4 decompositions (or 3 for the fidelity filter).
  49  *
  50  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  51  */
  52 #define MAX_DWT_LEVELS 5
  53
  54 /**
  55  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  56  */
  57 #define MAX_REFERENCE_FRAMES 8
  58 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  59 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  60 #define MAX_QUANT 68        /* max quant for VC-2 */
  61 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  62
  63 /**
  64  * DiracBlock->ref flags, if set then the block does MC from the given ref
  65  */
  66 #define DIRAC_REF_MASK_REF1   1
  67 #define DIRAC_REF_MASK_REF2   2
  68 #define DIRAC_REF_MASK_GLOBAL 4
  69
  70 /**
  71  * Value of Picture.reference when Picture is not a reference picture, but
  72  * is held for delayed output.
  73  */
  74 #define DELAYED_PIC_REF 4
  75
  76 #define CALC_PADDING(size, depth)                       \
  77     (((size + (1 << depth) - 1) >> depth) << depth)
  78
  79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  80
  81 typedef struct {
  82     AVFrame *avframe;
  83     int interpolated[3];    /* 1 if hpel[] is valid */
  84     uint8_t *hpel[3][4];
  85     uint8_t *hpel_base[3][4];
  86 } DiracFrame;
  87
  88 typedef struct {
  89     union {
  90         int16_t mv[2][2];
  91         int16_t dc[3];
  92     } u; /* anonymous unions aren't in C99 :( */
  93     uint8_t ref;
  94 } DiracBlock;
  95
  96 typedef struct SubBand {
  97     int level;
  98     int orientation;
  99     int stride;
 100     int width;
 101     int height;
 102     int quant;
 103     IDWTELEM *ibuf;
 104     struct SubBand *parent;
 105
 106     /* for low delay */
 107     unsigned length;
 108     const uint8_t *coeff_data;
 109 } SubBand;
 110
 111 typedef struct Plane {
 112     int width;
 113     int height;
 114     ptrdiff_t stride;
 115
 116     int idwt_width;
 117     int idwt_height;
 118     int idwt_stride;
 119     IDWTELEM *idwt_buf;
 120     IDWTELEM *idwt_buf_base;
 121     IDWTELEM *idwt_tmp;
 122
 123     /* block length */
 124     uint8_t xblen;
 125     uint8_t yblen;
 126     /* block separation (block n+1 starts after this many pixels in block n) */
 127     uint8_t xbsep;
 128     uint8_t ybsep;
 129     /* amount of overspill on each edge (half of the overlap between blocks) */
 130     uint8_t xoffset;
 131     uint8_t yoffset;
 132
 133     SubBand band[MAX_DWT_LEVELS][4];
 134 } Plane;
 135
 136 typedef struct DiracContext {
 137     AVCodecContext *avctx;
 138     MpegvideoEncDSPContext mpvencdsp;
 139     VideoDSPContext vdsp;
 140     DiracDSPContext diracdsp;
 141     GetBitContext gb;
 142     dirac_source_params source;
 143     int seen_sequence_header;
 144     int frame_number;           /* number of the next frame to display       */
 145     Plane plane[3];
 146     int chroma_x_shift;
 147     int chroma_y_shift;
 148
 149     int zero_res;               /* zero residue flag                         */
 150     int is_arith;               /* whether coeffs use arith or golomb coding */
 151     int low_delay;              /* use the low delay syntax                  */
 152     int globalmc_flag;          /* use global motion compensation            */
 153     int num_refs;               /* number of reference pictures              */
 154
 155     /* wavelet decoding */
 156     unsigned wavelet_depth;     /* depth of the IDWT                         */
 157     unsigned wavelet_idx;
 158
 159     /**
 160      * schroedinger older than 1.0.8 doesn't store
 161      * quant delta if only one codebook exists in a band
 162      */
 163     unsigned old_delta_quant;
 164     unsigned codeblock_mode;
 165
 166     struct {
 167         unsigned width;
 168         unsigned height;
 169     } codeblock[MAX_DWT_LEVELS+1];
 170
 171     struct {
 172         unsigned num_x;         /* number of horizontal slices               */
 173         unsigned num_y;         /* number of vertical slices                 */
 174         AVRational bytes;       /* average bytes per slice                   */
 175         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 176     } lowdelay;
 177
 178     struct {
 179         int pan_tilt[2];        /* pan/tilt vector                           */
 180         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 181         int perspective[2];     /* perspective vector                        */
 182         unsigned zrs_exp;
 183         unsigned perspective_exp;
 184     } globalmc[2];
 185
 186     /* motion compensation */
 187     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 188     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 189     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 190
 191     int blwidth;                /* number of blocks (horizontally)           */
 192     int blheight;               /* number of blocks (vertically)             */
 193     int sbwidth;                /* number of superblocks (horizontally)      */
 194     int sbheight;               /* number of superblocks (vertically)        */
 195
 196     uint8_t *sbsplit;
 197     DiracBlock *blmotion;
 198
 199     uint8_t *edge_emu_buffer[4];
 200     uint8_t *edge_emu_buffer_base;
 201
 202     uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
 203     uint8_t *mcscratch;
 204     int buffer_stride;
 205
 206     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 207
 208     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 209     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 210     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 211     dirac_weight_func weight_func;
 212     dirac_biweight_func biweight_func;
 213
 214     DiracFrame *current_picture;
 215     DiracFrame *ref_pics[2];
 216
 217     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 218     DiracFrame *delay_frames[MAX_DELAY+1];
 219     DiracFrame all_frames[MAX_FRAMES];
 220 } DiracContext;
 221
 222 /**
 223  * Dirac Specification ->
 224  * Parse code values. 9.6.1 Table 9.1
 225  */
 226 enum dirac_parse_code {
 227     pc_seq_header         = 0x00,
 228     pc_eos                = 0x10,
 229     pc_aux_data           = 0x20,
 230     pc_padding            = 0x30,
 231 };
 232
 233 enum dirac_subband {
 234     subband_ll = 0,
 235     subband_hl = 1,
 236     subband_lh = 2,
 237     subband_hh = 3,
 238     subband_nb,
 239 };
 240
 241 static const uint8_t default_qmat[][4][4] = {
 242     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 243     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 244     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 245     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 246     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 247     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 248     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 249 };
 250
 251 static const int qscale_tab[MAX_QUANT+1] = {
 252     4,     5,     6,     7,     8,    10,    11,    13,
 253     16,    19,    23,    27,    32,    38,    45,    54,
 254     64,    76,    91,   108,   128,   152,   181,   215,
 255     256,   304,   362,   431,   512,   609,   724,   861,
 256     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 257     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 258     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 259     65536, 77936
 260 };
 261
 262 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 263     1,     2,     3,     4,     4,     5,     6,     7,
 264     8,    10,    12,    14,    16,    19,    23,    27,
 265     32,    38,    46,    54,    64,    76,    91,   108,
 266     128,   152,   181,   216,   256,   305,   362,   431,
 267     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 268     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 269     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 270     32768, 38968
 271 };
 272
 273 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 274     1,     2,     2,     3,     3,     4,     4,     5,
 275     6,     7,     9,    10,    12,    14,    17,    20,
 276     24,    29,    34,    41,    48,    57,    68,    81,
 277     96,   114,   136,   162,   192,   228,   272,   323,
 278     384,   457,   543,   646,   768,   913,  1086,  1292,
 279     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 280     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 281     24576, 29226
 282 };
 283
 284 /* magic number division by 3 from schroedinger */
 285 static inline int divide3(int x)
 286 {
 287     return ((x+1)*21845 + 10922) >> 16;
 288 }
 289
 290 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 291 {
 292     DiracFrame *remove_pic = NULL;
 293     int i, remove_idx = -1;
 294
 295     for (i = 0; framelist[i]; i++)
 296         if (framelist[i]->avframe->display_picture_number == picnum) {
 297             remove_pic = framelist[i];
 298             remove_idx = i;
 299         }
 300
 301     if (remove_pic)
 302         for (i = remove_idx; framelist[i]; i++)
 303             framelist[i] = framelist[i+1];
 304
 305     return remove_pic;
 306 }
 307
 308 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 309 {
 310     int i;
 311     for (i = 0; i < maxframes; i++)
 312         if (!framelist[i]) {
 313             framelist[i] = frame;
 314             return 0;
 315         }
 316     return -1;
 317 }
 318
 319 static int alloc_sequence_buffers(DiracContext *s)
 320 {
 321     int sbwidth  = DIVRNDUP(s->source.width,  4);
 322     int sbheight = DIVRNDUP(s->source.height, 4);
 323     int i, w, h, top_padding;
 324
 325     /* todo: think more about this / use or set Plane here */
 326     for (i = 0; i < 3; i++) {
 327         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 328         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 329         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 330         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 331
 332         /* we allocate the max we support here since num decompositions can
 333          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 334          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 335          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 336          * on each side */
 337         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 338         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 339         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 340
 341         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
 342         s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
 343         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 344         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 345             return AVERROR(ENOMEM);
 346     }
 347
 348     /* fixme: allocate using real stride here */
 349     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 350     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 351
 352     if (!s->sbsplit || !s->blmotion)
 353         return AVERROR(ENOMEM);
 354     return 0;
 355 }
 356
 357 static int alloc_buffers(DiracContext *s, int stride)
 358 {
 359     int w = s->source.width;
 360     int h = s->source.height;
 361
 362     av_assert0(stride >= w);
 363     stride += 64;
 364
 365     if (s->buffer_stride >= stride)
 366         return 0;
 367     s->buffer_stride = 0;
 368
 369     av_freep(&s->edge_emu_buffer_base);
 370     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 371     av_freep(&s->mctmp);
 372     av_freep(&s->mcscratch);
 373
 374     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 375
 376     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 377     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 378
 379     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 380         return AVERROR(ENOMEM);
 381
 382     s->buffer_stride = stride;
 383     return 0;
 384 }
 385
 386 static void free_sequence_buffers(DiracContext *s)
 387 {
 388     int i, j, k;
 389
 390     for (i = 0; i < MAX_FRAMES; i++) {
 391         if (s->all_frames[i].avframe->data[0]) {
 392             av_frame_unref(s->all_frames[i].avframe);
 393             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 394         }
 395
 396         for (j = 0; j < 3; j++)
 397             for (k = 1; k < 4; k++)
 398                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 399     }
 400
 401     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 402     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 403
 404     for (i = 0; i < 3; i++) {
 405         av_freep(&s->plane[i].idwt_buf_base);
 406         av_freep(&s->plane[i].idwt_tmp);
 407     }
 408
 409     s->buffer_stride = 0;
 410     av_freep(&s->sbsplit);
 411     av_freep(&s->blmotion);
 412     av_freep(&s->edge_emu_buffer_base);
 413
 414     av_freep(&s->mctmp);
 415     av_freep(&s->mcscratch);
 416 }
 417
 418 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 419 {
 420     DiracContext *s = avctx->priv_data;
 421     int i;
 422
 423     s->avctx = avctx;
 424     s->frame_number = -1;
 425
 426     ff_diracdsp_init(&s->diracdsp);
 427     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 428     ff_videodsp_init(&s->vdsp, 8);
 429
 430     for (i = 0; i < MAX_FRAMES; i++) {
 431         s->all_frames[i].avframe = av_frame_alloc();
 432         if (!s->all_frames[i].avframe) {
 433             while (i > 0)
 434                 av_frame_free(&s->all_frames[--i].avframe);
 435             return AVERROR(ENOMEM);
 436         }
 437     }
 438
 439     return 0;
 440 }
 441
 442 static void dirac_decode_flush(AVCodecContext *avctx)
 443 {
 444     DiracContext *s = avctx->priv_data;
 445     free_sequence_buffers(s);
 446     s->seen_sequence_header = 0;
 447     s->frame_number = -1;
 448 }
 449
 450 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 451 {
 452     DiracContext *s = avctx->priv_data;
 453     int i;
 454
 455     dirac_decode_flush(avctx);
 456     for (i = 0; i < MAX_FRAMES; i++)
 457         av_frame_free(&s->all_frames[i].avframe);
 458
 459     return 0;
 460 }
 461
 462 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 463
 464 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 465                                       SubBand *b, IDWTELEM *buf, int x, int y)
 466 {
 467     int coeff, sign;
 468     int sign_pred = 0;
 469     int pred_ctx = CTX_ZPZN_F1;
 470
 471     /* Check if the parent subband has a 0 in the corresponding position */
 472     if (b->parent)
 473         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 474
 475     if (b->orientation == subband_hl)
 476         sign_pred = buf[-b->stride];
 477
 478     /* Determine if the pixel has only zeros in its neighbourhood */
 479     if (x) {
 480         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 481         if (b->orientation == subband_lh)
 482             sign_pred = buf[-1];
 483     } else {
 484         pred_ctx += !buf[-b->stride];
 485     }
 486
 487     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 488     if (coeff) {
 489         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 490         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 491         coeff = (coeff ^ -sign) + sign;
 492     }
 493     *buf = coeff;
 494 }
 495
 496 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 497 {
 498     int sign, coeff;
 499
 500     coeff = svq3_get_ue_golomb(gb);
 501     if (coeff) {
 502         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 503         sign  = get_bits1(gb);
 504         coeff = (coeff ^ -sign) + sign;
 505     }
 506     return coeff;
 507 }
 508
 509 /**
 510  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 511  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 512  */
 513 static inline void codeblock(DiracContext *s, SubBand *b,
 514                              GetBitContext *gb, DiracArith *c,
 515                              int left, int right, int top, int bottom,
 516                              int blockcnt_one, int is_arith)
 517 {
 518     int x, y, zero_block;
 519     int qoffset, qfactor;
 520     IDWTELEM *buf;
 521
 522     /* check for any coded coefficients in this codeblock */
 523     if (!blockcnt_one) {
 524         if (is_arith)
 525             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 526         else
 527             zero_block = get_bits1(gb);
 528
 529         if (zero_block)
 530             return;
 531     }
 532
 533     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 534         int quant = b->quant;
 535         if (is_arith)
 536             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 537         else
 538             quant += dirac_get_se_golomb(gb);
 539         if (quant < 0) {
 540             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 541             return;
 542         }
 543         b->quant = quant;
 544     }
 545
 546     b->quant = FFMIN(b->quant, MAX_QUANT);
 547
 548     qfactor = qscale_tab[b->quant];
 549     /* TODO: context pointer? */
 550     if (!s->num_refs)
 551         qoffset = qoffset_intra_tab[b->quant];
 552     else
 553         qoffset = qoffset_inter_tab[b->quant];
 554
 555     buf = b->ibuf + top * b->stride;
 556     for (y = top; y < bottom; y++) {
 557         for (x = left; x < right; x++) {
 558             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 559             if (is_arith)
 560                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 561             else
 562                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 563         }
 564         buf += b->stride;
 565     }
 566 }
 567
 568 /**
 569  * Dirac Specification ->
 570  * 13.3 intra_dc_prediction(band)
 571  */
 572 static inline void intra_dc_prediction(SubBand *b)
 573 {
 574     IDWTELEM *buf = b->ibuf;
 575     int x, y;
 576
 577     for (x = 1; x < b->width; x++)
 578         buf[x] += buf[x-1];
 579     buf += b->stride;
 580
 581     for (y = 1; y < b->height; y++) {
 582         buf[0] += buf[-b->stride];
 583
 584         for (x = 1; x < b->width; x++) {
 585             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 586             buf[x]  += divide3(pred);
 587         }
 588         buf += b->stride;
 589     }
 590 }
 591
 592 /**
 593  * Dirac Specification ->
 594  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 595  */
 596 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 597 {
 598     int cb_x, cb_y, left, right, top, bottom;
 599     DiracArith c;
 600     GetBitContext gb;
 601     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 602     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 603     int blockcnt_one = (cb_width + cb_height) == 2;
 604
 605     if (!b->length)
 606         return;
 607
 608     init_get_bits8(&gb, b->coeff_data, b->length);
 609
 610     if (is_arith)
 611         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 612
 613     top = 0;
 614     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 615         bottom = (b->height * (cb_y+1LL)) / cb_height;
 616         left = 0;
 617         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 618             right = (b->width * (cb_x+1LL)) / cb_width;
 619             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 620             left = right;
 621         }
 622         top = bottom;
 623     }
 624
 625     if (b->orientation == subband_ll && s->num_refs == 0)
 626         intra_dc_prediction(b);
 627 }
 628
 629 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 630 {
 631     DiracContext *s = avctx->priv_data;
 632     decode_subband_internal(s, b, 1);
 633     return 0;
 634 }
 635
 636 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 637 {
 638     DiracContext *s = avctx->priv_data;
 639     SubBand **b     = arg;
 640     decode_subband_internal(s, *b, 0);
 641     return 0;
 642 }
 643
 644 /**
 645  * Dirac Specification ->
 646  * [DIRAC_STD] 13.4.1 core_transform_data()
 647  */
 648 static void decode_component(DiracContext *s, int comp)
 649 {
 650     AVCodecContext *avctx = s->avctx;
 651     SubBand *bands[3*MAX_DWT_LEVELS+1];
 652     enum dirac_subband orientation;
 653     int level, num_bands = 0;
 654
 655     /* Unpack all subbands at all levels. */
 656     for (level = 0; level < s->wavelet_depth; level++) {
 657         for (orientation = !!level; orientation < 4; orientation++) {
 658             SubBand *b = &s->plane[comp].band[level][orientation];
 659             bands[num_bands++] = b;
 660
 661             align_get_bits(&s->gb);
 662             /* [DIRAC_STD] 13.4.2 subband() */
 663             b->length = svq3_get_ue_golomb(&s->gb);
 664             if (b->length) {
 665                 b->quant = svq3_get_ue_golomb(&s->gb);
 666                 align_get_bits(&s->gb);
 667                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 668                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 669                 skip_bits_long(&s->gb, b->length*8);
 670             }
 671         }
 672         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 673         if (s->is_arith)
 674             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 675                            NULL, 4-!!level, sizeof(SubBand));
 676     }
 677     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 678     if (!s->is_arith)
 679         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 680 }
 681
 682 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 683 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 684 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 685                              int slice_x, int slice_y, int bits_end,
 686                              SubBand *b1, SubBand *b2)
 687 {
 688     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 689     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 690     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 691     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 692
 693     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 694     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 695
 696     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 697     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 698     int x, y;
 699     /* we have to constantly check for overread since the spec explicitly
 700        requires this, with the meaning that all remaining coeffs are set to 0 */
 701     if (get_bits_count(gb) >= bits_end)
 702         return;
 703
 704     for (y = top; y < bottom; y++) {
 705         for (x = left; x < right; x++) {
 706             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 707             if (get_bits_count(gb) >= bits_end)
 708                 return;
 709             if (buf2) {
 710                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 711                 if (get_bits_count(gb) >= bits_end)
 712                     return;
 713             }
 714         }
 715         buf1 += b1->stride;
 716         if (buf2)
 717             buf2 += b2->stride;
 718     }
 719 }
 720
 721 struct lowdelay_slice {
 722     GetBitContext gb;
 723     int slice_x;
 724     int slice_y;
 725     int bytes;
 726 };
 727
 728
 729 /**
 730  * Dirac Specification ->
 731  * 13.5.2 Slices. slice(sx,sy)
 732  */
 733 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 734 {
 735     DiracContext *s = avctx->priv_data;
 736     struct lowdelay_slice *slice = arg;
 737     GetBitContext *gb = &slice->gb;
 738     enum dirac_subband orientation;
 739     int level, quant, chroma_bits, chroma_end;
 740
 741     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 742     int length_bits = av_log2(8 * slice->bytes)+1;
 743     int luma_bits   = get_bits_long(gb, length_bits);
 744     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 745
 746     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 747     for (level = 0; level < s->wavelet_depth; level++)
 748         for (orientation = !!level; orientation < 4; orientation++) {
 749             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 750             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 751                              &s->plane[0].band[level][orientation], NULL);
 752         }
 753
 754     /* consume any unused bits from luma */
 755     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 756
 757     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 758     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 759     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 760     for (level = 0; level < s->wavelet_depth; level++)
 761         for (orientation = !!level; orientation < 4; orientation++) {
 762             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 763             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 764                              &s->plane[1].band[level][orientation],
 765                              &s->plane[2].band[level][orientation]);
 766         }
 767
 768     return 0;
 769 }
 770
 771 /**
 772  * Dirac Specification ->
 773  * 13.5.1 low_delay_transform_data()
 774  */
 775 static int decode_lowdelay(DiracContext *s)
 776 {
 777     AVCodecContext *avctx = s->avctx;
 778     int slice_x, slice_y, bytes, bufsize;
 779     const uint8_t *buf;
 780     struct lowdelay_slice *slices;
 781     int slice_num = 0;
 782
 783     slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 784     if (!slices)
 785         return AVERROR(ENOMEM);
 786
 787     align_get_bits(&s->gb);
 788     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 789     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 790     bufsize = get_bits_left(&s->gb);
 791
 792     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 793         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 794             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 795                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 796
 797             slices[slice_num].bytes   = bytes;
 798             slices[slice_num].slice_x = slice_x;
 799             slices[slice_num].slice_y = slice_y;
 800             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 801             slice_num++;
 802
 803             buf     += bytes;
 804             if (bufsize/8 >= bytes)
 805                 bufsize -= bytes*8;
 806             else
 807                 bufsize = 0;
 808         }
 809
 810     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 811                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 812     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 813     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 814     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 815     av_free(slices);
 816     return 0;
 817 }
 818
 819 static void init_planes(DiracContext *s)
 820 {
 821     int i, w, h, level, orientation;
 822
 823     for (i = 0; i < 3; i++) {
 824         Plane *p = &s->plane[i];
 825
 826         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 827         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 828         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 829         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 830         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 831
 832         for (level = s->wavelet_depth-1; level >= 0; level--) {
 833             w = w>>1;
 834             h = h>>1;
 835             for (orientation = !!level; orientation < 4; orientation++) {
 836                 SubBand *b = &p->band[level][orientation];
 837
 838                 b->ibuf   = p->idwt_buf;
 839                 b->level  = level;
 840                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 841                 b->width  = w;
 842                 b->height = h;
 843                 b->orientation = orientation;
 844
 845                 if (orientation & 1)
 846                     b->ibuf += w;
 847                 if (orientation > 1)
 848                     b->ibuf += b->stride>>1;
 849
 850                 if (level)
 851                     b->parent = &p->band[level-1][orientation];
 852             }
 853         }
 854
 855         if (i > 0) {
 856             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 857             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 858             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 859             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 860         }
 861
 862         p->xoffset = (p->xblen - p->xbsep)/2;
 863         p->yoffset = (p->yblen - p->ybsep)/2;
 864     }
 865 }
 866
 867 /**
 868  * Unpack the motion compensation parameters
 869  * Dirac Specification ->
 870  * 11.2 Picture prediction data. picture_prediction()
 871  */
 872 static int dirac_unpack_prediction_parameters(DiracContext *s)
 873 {
 874     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 875     static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
 876
 877     GetBitContext *gb = &s->gb;
 878     unsigned idx, ref;
 879
 880     align_get_bits(gb);
 881     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 882     /* Luma and Chroma are equal. 11.2.3 */
 883     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 884
 885     if (idx > 4) {
 886         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 887         return AVERROR_INVALIDDATA;
 888     }
 889
 890     if (idx == 0) {
 891         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 892         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 893         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 894         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 895     } else {
 896         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 897         s->plane[0].xblen = default_blen[idx-1];
 898         s->plane[0].yblen = default_blen[idx-1];
 899         s->plane[0].xbsep = default_bsep[idx-1];
 900         s->plane[0].ybsep = default_bsep[idx-1];
 901     }
 902     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 903       Calculated in function dirac_unpack_block_motion_data */
 904
 905     if (s->plane[0].xblen % (1 << s->chroma_x_shift) != 0 ||
 906         s->plane[0].yblen % (1 << s->chroma_y_shift) != 0 ||
 907         !s->plane[0].xblen || !s->plane[0].yblen) {
 908         av_log(s->avctx, AV_LOG_ERROR,
 909                "invalid x/y block length (%d/%d) for x/y chroma shift (%d/%d)\n",
 910                s->plane[0].xblen, s->plane[0].yblen, s->chroma_x_shift, s->chroma_y_shift);
 911         return AVERROR_INVALIDDATA;
 912     }
 913     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 914         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 915         return AVERROR_INVALIDDATA;
 916     }
 917     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 918         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 919         return AVERROR_INVALIDDATA;
 920     }
 921     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 922         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 923         return AVERROR_PATCHWELCOME;
 924     }
 925
 926     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 927       Read motion vector precision */
 928     s->mv_precision = svq3_get_ue_golomb(gb);
 929     if (s->mv_precision > 3) {
 930         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 931         return AVERROR_INVALIDDATA;
 932     }
 933
 934     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 935       Read the global motion compensation parameters */
 936     s->globalmc_flag = get_bits1(gb);
 937     if (s->globalmc_flag) {
 938         memset(s->globalmc, 0, sizeof(s->globalmc));
 939         /* [DIRAC_STD] pan_tilt(gparams) */
 940         for (ref = 0; ref < s->num_refs; ref++) {
 941             if (get_bits1(gb)) {
 942                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 943                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 944             }
 945             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 946                zoom/rotation/shear parameters */
 947             if (get_bits1(gb)) {
 948                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 949                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 950                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 951                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 952                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 953             } else {
 954                 s->globalmc[ref].zrs[0][0] = 1;
 955                 s->globalmc[ref].zrs[1][1] = 1;
 956             }
 957             /* [DIRAC_STD] perspective(gparams) */
 958             if (get_bits1(gb)) {
 959                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 960                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 961                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 962             }
 963         }
 964     }
 965
 966     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 967       Picture prediction mode, not currently used. */
 968     if (svq3_get_ue_golomb(gb)) {
 969         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 970         return AVERROR_INVALIDDATA;
 971     }
 972
 973     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 974        just data read, weight calculation will be done later on. */
 975     s->weight_log2denom = 1;
 976     s->weight[0]        = 1;
 977     s->weight[1]        = 1;
 978
 979     if (get_bits1(gb)) {
 980         s->weight_log2denom = svq3_get_ue_golomb(gb);
 981         s->weight[0] = dirac_get_se_golomb(gb);
 982         if (s->num_refs == 2)
 983             s->weight[1] = dirac_get_se_golomb(gb);
 984     }
 985     return 0;
 986 }
 987
 988 /**
 989  * Dirac Specification ->
 990  * 11.3 Wavelet transform data. wavelet_transform()
 991  */
 992 static int dirac_unpack_idwt_params(DiracContext *s)
 993 {
 994     GetBitContext *gb = &s->gb;
 995     int i, level;
 996     unsigned tmp;
 997
 998 #define CHECKEDREAD(dst, cond, errmsg) \
 999     tmp = svq3_get_ue_golomb(gb); \
1000     if (cond) { \
1001         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
1002         return AVERROR_INVALIDDATA; \
1003     }\
1004     dst = tmp;
1005
1006     align_get_bits(gb);
1007
1008     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
1009     if (s->zero_res)
1010         return 0;
1011
1012     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
1013     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1014
1015     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1016
1017     if (!s->low_delay) {
1018         /* Codeblock parameters (core syntax only) */
1019         if (get_bits1(gb)) {
1020             for (i = 0; i <= s->wavelet_depth; i++) {
1021                 CHECKEDREAD(s->codeblock[i].width , tmp < 1 || tmp > (s->avctx->width >>s->wavelet_depth-i), "codeblock width invalid\n")
1022                 CHECKEDREAD(s->codeblock[i].height, tmp < 1 || tmp > (s->avctx->height>>s->wavelet_depth-i), "codeblock height invalid\n")
1023             }
1024
1025             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1026         } else
1027             for (i = 0; i <= s->wavelet_depth; i++)
1028                 s->codeblock[i].width = s->codeblock[i].height = 1;
1029     } else {
1030         /* Slice parameters + quantization matrix*/
1031         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1032         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
1033         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
1034         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1035         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1036
1037         if (s->lowdelay.bytes.den <= 0) {
1038             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1039             return AVERROR_INVALIDDATA;
1040         }
1041
1042         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1043         if (get_bits1(gb)) {
1044             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1045             /* custom quantization matrix */
1046             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1047             for (level = 0; level < s->wavelet_depth; level++) {
1048                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1049                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1050                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1051             }
1052         } else {
1053             if (s->wavelet_depth > 4) {
1054                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1055                 return AVERROR_INVALIDDATA;
1056             }
1057             /* default quantization matrix */
1058             for (level = 0; level < s->wavelet_depth; level++)
1059                 for (i = 0; i < 4; i++) {
1060                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1061                     /* haar with no shift differs for different depths */
1062                     if (s->wavelet_idx == 3)
1063                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1064                 }
1065         }
1066     }
1067     return 0;
1068 }
1069
1070 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1071 {
1072     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1073
1074     if (!(x|y))
1075         return 0;
1076     else if (!y)
1077         return sbsplit[-1];
1078     else if (!x)
1079         return sbsplit[-stride];
1080
1081     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1082 }
1083
1084 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1085 {
1086     int pred;
1087
1088     if (!(x|y))
1089         return 0;
1090     else if (!y)
1091         return block[-1].ref & refmask;
1092     else if (!x)
1093         return block[-stride].ref & refmask;
1094
1095     /* return the majority */
1096     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1097     return (pred >> 1) & refmask;
1098 }
1099
1100 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1101 {
1102     int i, n = 0;
1103
1104     memset(block->u.dc, 0, sizeof(block->u.dc));
1105
1106     if (x && !(block[-1].ref & 3)) {
1107         for (i = 0; i < 3; i++)
1108             block->u.dc[i] += block[-1].u.dc[i];
1109         n++;
1110     }
1111
1112     if (y && !(block[-stride].ref & 3)) {
1113         for (i = 0; i < 3; i++)
1114             block->u.dc[i] += block[-stride].u.dc[i];
1115         n++;
1116     }
1117
1118     if (x && y && !(block[-1-stride].ref & 3)) {
1119         for (i = 0; i < 3; i++)
1120             block->u.dc[i] += block[-1-stride].u.dc[i];
1121         n++;
1122     }
1123
1124     if (n == 2) {
1125         for (i = 0; i < 3; i++)
1126             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1127     } else if (n == 3) {
1128         for (i = 0; i < 3; i++)
1129             block->u.dc[i] = divide3(block->u.dc[i]);
1130     }
1131 }
1132
1133 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1134 {
1135     int16_t *pred[3];
1136     int refmask = ref+1;
1137     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1138     int n = 0;
1139
1140     if (x && (block[-1].ref & mask) == refmask)
1141         pred[n++] = block[-1].u.mv[ref];
1142
1143     if (y && (block[-stride].ref & mask) == refmask)
1144         pred[n++] = block[-stride].u.mv[ref];
1145
1146     if (x && y && (block[-stride-1].ref & mask) == refmask)
1147         pred[n++] = block[-stride-1].u.mv[ref];
1148
1149     switch (n) {
1150     case 0:
1151         block->u.mv[ref][0] = 0;
1152         block->u.mv[ref][1] = 0;
1153         break;
1154     case 1:
1155         block->u.mv[ref][0] = pred[0][0];
1156         block->u.mv[ref][1] = pred[0][1];
1157         break;
1158     case 2:
1159         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1160         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1161         break;
1162     case 3:
1163         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1164         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1165         break;
1166     }
1167 }
1168
1169 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1170 {
1171     int ez      = s->globalmc[ref].zrs_exp;
1172     int ep      = s->globalmc[ref].perspective_exp;
1173     int (*A)[2] = s->globalmc[ref].zrs;
1174     int *b      = s->globalmc[ref].pan_tilt;
1175     int *c      = s->globalmc[ref].perspective;
1176
1177     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1178     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1179     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1180
1181     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1182     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1183 }
1184
1185 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1186                                 int stride, int x, int y)
1187 {
1188     int i;
1189
1190     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1191     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1192
1193     if (s->num_refs == 2) {
1194         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1195         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1196     }
1197
1198     if (!block->ref) {
1199         pred_block_dc(block, stride, x, y);
1200         for (i = 0; i < 3; i++)
1201             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1202         return;
1203     }
1204
1205     if (s->globalmc_flag) {
1206         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1207         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1208     }
1209
1210     for (i = 0; i < s->num_refs; i++)
1211         if (block->ref & (i+1)) {
1212             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1213                 global_mv(s, block, x, y, i);
1214             } else {
1215                 pred_mv(block, stride, x, y, i);
1216                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1217                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1218             }
1219         }
1220 }
1221
1222 /**
1223  * Copies the current block to the other blocks covered by the current superblock split mode
1224  */
1225 static void propagate_block_data(DiracBlock *block, int stride, int size)
1226 {
1227     int x, y;
1228     DiracBlock *dst = block;
1229
1230     for (x = 1; x < size; x++)
1231         dst[x] = *block;
1232
1233     for (y = 1; y < size; y++) {
1234         dst += stride;
1235         for (x = 0; x < size; x++)
1236             dst[x] = *block;
1237     }
1238 }
1239
1240 /**
1241  * Dirac Specification ->
1242  * 12. Block motion data syntax
1243  */
1244 static int dirac_unpack_block_motion_data(DiracContext *s)
1245 {
1246     GetBitContext *gb = &s->gb;
1247     uint8_t *sbsplit = s->sbsplit;
1248     int i, x, y, q, p;
1249     DiracArith arith[8];
1250
1251     align_get_bits(gb);
1252
1253     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1254     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1255     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1256     s->blwidth  = 4 * s->sbwidth;
1257     s->blheight = 4 * s->sbheight;
1258
1259     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1260        decode superblock split modes */
1261     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1262     for (y = 0; y < s->sbheight; y++) {
1263         for (x = 0; x < s->sbwidth; x++) {
1264             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1265             if (split > 2)
1266                 return AVERROR_INVALIDDATA;
1267             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1268         }
1269         sbsplit += s->sbwidth;
1270     }
1271
1272     /* setup arith decoding */
1273     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1274     for (i = 0; i < s->num_refs; i++) {
1275         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1276         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1277     }
1278     for (i = 0; i < 3; i++)
1279         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1280
1281     for (y = 0; y < s->sbheight; y++)
1282         for (x = 0; x < s->sbwidth; x++) {
1283             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1284             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1285
1286             for (q = 0; q < blkcnt; q++)
1287                 for (p = 0; p < blkcnt; p++) {
1288                     int bx = 4 * x + p*step;
1289                     int by = 4 * y + q*step;
1290                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1291                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1292                     propagate_block_data(block, s->blwidth, step);
1293                 }
1294         }
1295
1296     return 0;
1297 }
1298
1299 static int weight(int i, int blen, int offset)
1300 {
1301 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1302     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1303
1304     if (i < 2*offset)
1305         return ROLLOFF(i);
1306     else if (i > blen-1 - 2*offset)
1307         return ROLLOFF(blen-1 - i);
1308     return 8;
1309 }
1310
1311 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1312                                  int left, int right, int wy)
1313 {
1314     int x;
1315     for (x = 0; left && x < p->xblen >> 1; x++)
1316         obmc_weight[x] = wy*8;
1317     for (; x < p->xblen >> right; x++)
1318         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1319     for (; x < p->xblen; x++)
1320         obmc_weight[x] = wy*8;
1321     for (; x < stride; x++)
1322         obmc_weight[x] = 0;
1323 }
1324
1325 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1326                              int left, int right, int top, int bottom)
1327 {
1328     int y;
1329     for (y = 0; top && y < p->yblen >> 1; y++) {
1330         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1331         obmc_weight += stride;
1332     }
1333     for (; y < p->yblen >> bottom; y++) {
1334         int wy = weight(y, p->yblen, p->yoffset);
1335         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1336         obmc_weight += stride;
1337     }
1338     for (; y < p->yblen; y++) {
1339         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1340         obmc_weight += stride;
1341     }
1342 }
1343
1344 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1345 {
1346     int top = !by;
1347     int bottom = by == s->blheight-1;
1348
1349     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1350     if (top || bottom || by == 1) {
1351         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1352         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1353         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1354     }
1355 }
1356
1357 static const uint8_t epel_weights[4][4][4] = {
1358     {{ 16,  0,  0,  0 },
1359      { 12,  4,  0,  0 },
1360      {  8,  8,  0,  0 },
1361      {  4, 12,  0,  0 }},
1362     {{ 12,  0,  4,  0 },
1363      {  9,  3,  3,  1 },
1364      {  6,  6,  2,  2 },
1365      {  3,  9,  1,  3 }},
1366     {{  8,  0,  8,  0 },
1367      {  6,  2,  6,  2 },
1368      {  4,  4,  4,  4 },
1369      {  2,  6,  2,  6 }},
1370     {{  4,  0, 12,  0 },
1371      {  3,  1,  9,  3 },
1372      {  2,  2,  6,  6 },
1373      {  1,  3,  3,  9 }}
1374 };
1375
1376 /**
1377  * For block x,y, determine which of the hpel planes to do bilinear
1378  * interpolation from and set src[] to the location in each hpel plane
1379  * to MC from.
1380  *
1381  * @return the index of the put_dirac_pixels_tab function to use
1382  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1383  */
1384 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1385                      int x, int y, int ref, int plane)
1386 {
1387     Plane *p = &s->plane[plane];
1388     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1389     int motion_x = block->u.mv[ref][0];
1390     int motion_y = block->u.mv[ref][1];
1391     int mx, my, i, epel, nplanes = 0;
1392
1393     if (plane) {
1394         motion_x >>= s->chroma_x_shift;
1395         motion_y >>= s->chroma_y_shift;
1396     }
1397
1398     mx         = motion_x & ~(-1U << s->mv_precision);
1399     my         = motion_y & ~(-1U << s->mv_precision);
1400     motion_x >>= s->mv_precision;
1401     motion_y >>= s->mv_precision;
1402     /* normalize subpel coordinates to epel */
1403     /* TODO: template this function? */
1404     mx      <<= 3 - s->mv_precision;
1405     my      <<= 3 - s->mv_precision;
1406
1407     x += motion_x;
1408     y += motion_y;
1409     epel = (mx|my)&1;
1410
1411     /* hpel position */
1412     if (!((mx|my)&3)) {
1413         nplanes = 1;
1414         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1415     } else {
1416         /* qpel or epel */
1417         nplanes = 4;
1418         for (i = 0; i < 4; i++)
1419             src[i] = ref_hpel[i] + y*p->stride + x;
1420
1421         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1422            we increment x/y because the edge changes for half of the pixels */
1423         if (mx > 4) {
1424             src[0] += 1;
1425             src[2] += 1;
1426             x++;
1427         }
1428         if (my > 4) {
1429             src[0] += p->stride;
1430             src[1] += p->stride;
1431             y++;
1432         }
1433
1434         /* hpel planes are:
1435            [0]: F  [1]: H
1436            [2]: V  [3]: C */
1437         if (!epel) {
1438             /* check if we really only need 2 planes since either mx or my is
1439                a hpel position. (epel weights of 0 handle this there) */
1440             if (!(mx&3)) {
1441                 /* mx == 0: average [0] and [2]
1442                    mx == 4: average [1] and [3] */
1443                 src[!mx] = src[2 + !!mx];
1444                 nplanes = 2;
1445             } else if (!(my&3)) {
1446                 src[0] = src[(my>>1)  ];
1447                 src[1] = src[(my>>1)+1];
1448                 nplanes = 2;
1449             }
1450         } else {
1451             /* adjust the ordering if needed so the weights work */
1452             if (mx > 4) {
1453                 FFSWAP(const uint8_t *, src[0], src[1]);
1454                 FFSWAP(const uint8_t *, src[2], src[3]);
1455             }
1456             if (my > 4) {
1457                 FFSWAP(const uint8_t *, src[0], src[2]);
1458                 FFSWAP(const uint8_t *, src[1], src[3]);
1459             }
1460             src[4] = epel_weights[my&3][mx&3];
1461         }
1462     }
1463
1464     /* fixme: v/h _edge_pos */
1465     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1466         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1467         x < 0 || y < 0) {
1468         for (i = 0; i < nplanes; i++) {
1469             s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1470                                      p->stride, p->stride,
1471                                      p->xblen, p->yblen, x, y,
1472                                      p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1473             src[i] = s->edge_emu_buffer[i];
1474         }
1475     }
1476     return (nplanes>>1) + epel;
1477 }
1478
1479 static void add_dc(uint16_t *dst, int dc, int stride,
1480                    uint8_t *obmc_weight, int xblen, int yblen)
1481 {
1482     int x, y;
1483     dc += 128;
1484
1485     for (y = 0; y < yblen; y++) {
1486         for (x = 0; x < xblen; x += 2) {
1487             dst[x  ] += dc * obmc_weight[x  ];
1488             dst[x+1] += dc * obmc_weight[x+1];
1489         }
1490         dst          += stride;
1491         obmc_weight  += MAX_BLOCKSIZE;
1492     }
1493 }
1494
1495 static void block_mc(DiracContext *s, DiracBlock *block,
1496                      uint16_t *mctmp, uint8_t *obmc_weight,
1497                      int plane, int dstx, int dsty)
1498 {
1499     Plane *p = &s->plane[plane];
1500     const uint8_t *src[5];
1501     int idx;
1502
1503     switch (block->ref&3) {
1504     case 0: /* DC */
1505         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1506         return;
1507     case 1:
1508     case 2:
1509         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1510         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1511         if (s->weight_func)
1512             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1513                            s->weight[0] + s->weight[1], p->yblen);
1514         break;
1515     case 3:
1516         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1517         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1518         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1519         if (s->biweight_func) {
1520             /* fixme: +32 is a quick hack */
1521             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1522             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1523                              s->weight[0], s->weight[1], p->yblen);
1524         } else
1525             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1526         break;
1527     }
1528     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1529 }
1530
1531 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1532 {
1533     Plane *p = &s->plane[plane];
1534     int x, dstx = p->xbsep - p->xoffset;
1535
1536     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1537     mctmp += p->xbsep;
1538
1539     for (x = 1; x < s->blwidth-1; x++) {
1540         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1541         dstx  += p->xbsep;
1542         mctmp += p->xbsep;
1543     }
1544     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1545 }
1546
1547 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1548 {
1549     int idx = 0;
1550     if (xblen > 8)
1551         idx = 1;
1552     if (xblen > 16)
1553         idx = 2;
1554
1555     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1556     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1557     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1558     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1559         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1560         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1561     } else {
1562         s->weight_func   = NULL;
1563         s->biweight_func = NULL;
1564     }
1565 }
1566
1567 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1568 {
1569     /* chroma allocates an edge of 8 when subsampled
1570        which for 4:2:2 means an h edge of 16 and v edge of 8
1571        just use 8 for everything for the moment */
1572     int i, edge = EDGE_WIDTH/2;
1573
1574     ref->hpel[plane][0] = ref->avframe->data[plane];
1575     s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1576
1577     /* no need for hpel if we only have fpel vectors */
1578     if (!s->mv_precision)
1579         return;
1580
1581     for (i = 1; i < 4; i++) {
1582         if (!ref->hpel_base[plane][i])
1583             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1584         /* we need to be 16-byte aligned even for chroma */
1585         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1586     }
1587
1588     if (!ref->interpolated[plane]) {
1589         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1590                                       ref->hpel[plane][3], ref->hpel[plane][0],
1591                                       ref->avframe->linesize[plane], width, height);
1592         s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1593         s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1594         s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1595     }
1596     ref->interpolated[plane] = 1;
1597 }
1598
1599 /**
1600  * Dirac Specification ->
1601  * 13.0 Transform data syntax. transform_data()
1602  */
1603 static int dirac_decode_frame_internal(DiracContext *s)
1604 {
1605     DWTContext d;
1606     int y, i, comp, dsty;
1607     int ret;
1608
1609     if (s->low_delay) {
1610         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1611         for (comp = 0; comp < 3; comp++) {
1612             Plane *p = &s->plane[comp];
1613             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1614         }
1615         if (!s->zero_res) {
1616             if ((ret = decode_lowdelay(s)) < 0)
1617                 return ret;
1618         }
1619     }
1620
1621     for (comp = 0; comp < 3; comp++) {
1622         Plane *p       = &s->plane[comp];
1623         uint8_t *frame = s->current_picture->avframe->data[comp];
1624
1625         /* FIXME: small resolutions */
1626         for (i = 0; i < 4; i++)
1627             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1628
1629         if (!s->zero_res && !s->low_delay)
1630         {
1631             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1632             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1633         }
1634         ret = ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1635                                     s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp);
1636         if (ret < 0)
1637             return ret;
1638
1639         if (!s->num_refs) { /* intra */
1640             for (y = 0; y < p->height; y += 16) {
1641                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1642                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1643                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1644             }
1645         } else { /* inter */
1646             int rowheight = p->ybsep*p->stride;
1647
1648             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1649
1650             for (i = 0; i < s->num_refs; i++)
1651                 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1652
1653             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1654
1655             dsty = -p->yoffset;
1656             for (y = 0; y < s->blheight; y++) {
1657                 int h     = 0,
1658                     start = FFMAX(dsty, 0);
1659                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1660                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1661
1662                 init_obmc_weights(s, p, y);
1663
1664                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1665                     h = p->height - start;
1666                 else
1667                     h = p->ybsep - (start - dsty);
1668                 if (h < 0)
1669                     break;
1670
1671                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1672                 mc_row(s, blocks, mctmp, comp, dsty);
1673
1674                 mctmp += (start - dsty)*p->stride + p->xoffset;
1675                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1676                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1677                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1678
1679                 dsty += p->ybsep;
1680             }
1681         }
1682     }
1683
1684
1685     return 0;
1686 }
1687
1688 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1689 {
1690     int ret, i;
1691     int chroma_x_shift, chroma_y_shift;
1692     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1693
1694     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1695     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1696     ret = ff_get_buffer(avctx, f, flags);
1697     if (ret < 0)
1698         return ret;
1699
1700     for (i = 0; f->data[i]; i++) {
1701         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1702                      f->linesize[i] + 32;
1703         f->data[i] += offset;
1704     }
1705     f->width  = avctx->width;
1706     f->height = avctx->height;
1707
1708     return 0;
1709 }
1710
1711 /**
1712  * Dirac Specification ->
1713  * 11.1.1 Picture Header. picture_header()
1714  */
1715 static int dirac_decode_picture_header(DiracContext *s)
1716 {
1717     unsigned retire, picnum;
1718     int i, j, ret;
1719     int64_t refdist, refnum;
1720     GetBitContext *gb = &s->gb;
1721
1722     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1723     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1724
1725
1726     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1727
1728     /* if this is the first keyframe after a sequence header, start our
1729        reordering from here */
1730     if (s->frame_number < 0)
1731         s->frame_number = picnum;
1732
1733     s->ref_pics[0] = s->ref_pics[1] = NULL;
1734     for (i = 0; i < s->num_refs; i++) {
1735         refnum = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
1736         refdist = INT64_MAX;
1737
1738         /* find the closest reference to the one we want */
1739         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1740         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1741             if (s->ref_frames[j]
1742                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1743                 s->ref_pics[i] = s->ref_frames[j];
1744                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1745             }
1746
1747         if (!s->ref_pics[i] || refdist)
1748             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1749
1750         /* if there were no references at all, allocate one */
1751         if (!s->ref_pics[i])
1752             for (j = 0; j < MAX_FRAMES; j++)
1753                 if (!s->all_frames[j].avframe->data[0]) {
1754                     s->ref_pics[i] = &s->all_frames[j];
1755                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1756                     break;
1757                 }
1758
1759         if (!s->ref_pics[i]) {
1760             av_log(s->avctx, AV_LOG_ERROR, "Reference could not be allocated\n");
1761             return AVERROR_INVALIDDATA;
1762         }
1763
1764     }
1765
1766     /* retire the reference frames that are not used anymore */
1767     if (s->current_picture->avframe->reference) {
1768         retire = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
1769         if (retire != picnum) {
1770             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1771
1772             if (retire_pic)
1773                 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1774             else
1775                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1776         }
1777
1778         /* if reference array is full, remove the oldest as per the spec */
1779         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1780             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1781             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1782         }
1783     }
1784
1785     if (s->num_refs) {
1786         ret = dirac_unpack_prediction_parameters(s);  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1787         if (ret < 0)
1788             return ret;
1789         ret = dirac_unpack_block_motion_data(s);      /* [DIRAC_STD] 12. Block motion data syntax                       */
1790         if (ret < 0)
1791             return ret;
1792     }
1793     ret = dirac_unpack_idwt_params(s);                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1794     if (ret < 0)
1795         return ret;
1796
1797     init_planes(s);
1798     return 0;
1799 }
1800
1801 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1802 {
1803     DiracFrame *out = s->delay_frames[0];
1804     int i, out_idx  = 0;
1805     int ret;
1806
1807     /* find frame with lowest picture number */
1808     for (i = 1; s->delay_frames[i]; i++)
1809         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1810             out     = s->delay_frames[i];
1811             out_idx = i;
1812         }
1813
1814     for (i = out_idx; s->delay_frames[i]; i++)
1815         s->delay_frames[i] = s->delay_frames[i+1];
1816
1817     if (out) {
1818         out->avframe->reference ^= DELAYED_PIC_REF;
1819         *got_frame = 1;
1820         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1821             return ret;
1822     }
1823
1824     return 0;
1825 }
1826
1827 /**
1828  * Dirac Specification ->
1829  * 9.6 Parse Info Header Syntax. parse_info()
1830  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1831  */
1832 #define DATA_UNIT_HEADER_SIZE 13
1833
1834 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1835    inside the function parse_sequence() */
1836 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1837 {
1838     DiracContext *s   = avctx->priv_data;
1839     DiracFrame *pic   = NULL;
1840     int ret, i, parse_code;
1841     unsigned tmp;
1842
1843     if (size < DATA_UNIT_HEADER_SIZE)
1844         return AVERROR_INVALIDDATA;
1845
1846     parse_code = buf[4];
1847
1848     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1849
1850     if (parse_code == pc_seq_header) {
1851         if (s->seen_sequence_header)
1852             return 0;
1853
1854         /* [DIRAC_STD] 10. Sequence header */
1855         ret = avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source);
1856         if (ret < 0)
1857             return ret;
1858
1859         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1860
1861         ret = alloc_sequence_buffers(s);
1862         if (ret < 0)
1863             return ret;
1864
1865         s->seen_sequence_header = 1;
1866     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1867         free_sequence_buffers(s);
1868         s->seen_sequence_header = 0;
1869     } else if (parse_code == pc_aux_data) {
1870         if (buf[13] == 1) {     /* encoder implementation/version */
1871             int ver[3];
1872             /* versions older than 1.0.8 don't store quant delta for
1873                subbands with only one codeblock */
1874             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1875                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1876                     s->old_delta_quant = 1;
1877         }
1878     } else if (parse_code & 0x8) {  /* picture data unit */
1879         if (!s->seen_sequence_header) {
1880             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1881             return AVERROR_INVALIDDATA;
1882         }
1883
1884         /* find an unused frame */
1885         for (i = 0; i < MAX_FRAMES; i++)
1886             if (s->all_frames[i].avframe->data[0] == NULL)
1887                 pic = &s->all_frames[i];
1888         if (!pic) {
1889             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1890             return AVERROR_INVALIDDATA;
1891         }
1892
1893         av_frame_unref(pic->avframe);
1894
1895         /* [DIRAC_STD] Defined in 9.6.1 ... */
1896         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1897         if (tmp > 2) {
1898             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1899             return AVERROR_INVALIDDATA;
1900         }
1901         s->num_refs    = tmp;
1902         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1903         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1904         pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1905         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1906         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1907
1908         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1909             return ret;
1910         s->current_picture = pic;
1911         s->plane[0].stride = pic->avframe->linesize[0];
1912         s->plane[1].stride = pic->avframe->linesize[1];
1913         s->plane[2].stride = pic->avframe->linesize[2];
1914
1915         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1916             return AVERROR(ENOMEM);
1917
1918         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1919         ret = dirac_decode_picture_header(s);
1920         if (ret < 0)
1921             return ret;
1922
1923         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1924         ret = dirac_decode_frame_internal(s);
1925         if (ret < 0)
1926             return ret;
1927     }
1928     return 0;
1929 }
1930
1931 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1932 {
1933     DiracContext *s     = avctx->priv_data;
1934     AVFrame *picture    = data;
1935     uint8_t *buf        = pkt->data;
1936     int buf_size        = pkt->size;
1937     int i, data_unit_size, buf_idx = 0;
1938     int ret;
1939
1940     /* release unused frames */
1941     for (i = 0; i < MAX_FRAMES; i++)
1942         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1943             av_frame_unref(s->all_frames[i].avframe);
1944             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1945         }
1946
1947     s->current_picture = NULL;
1948     *got_frame = 0;
1949
1950     /* end of stream, so flush delayed pics */
1951     if (buf_size == 0)
1952         return get_delayed_pic(s, (AVFrame *)data, got_frame);
1953
1954     for (;;) {
1955         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1956           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1957           BBCD start code search */
1958         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1959             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1960                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1961                 break;
1962         }
1963         /* BBCD found or end of data */
1964         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1965             break;
1966
1967         data_unit_size = AV_RB32(buf+buf_idx+5);
1968         if (data_unit_size > buf_size - buf_idx || !data_unit_size) {
1969             if(data_unit_size > buf_size - buf_idx)
1970             av_log(s->avctx, AV_LOG_ERROR,
1971                    "Data unit with size %d is larger than input buffer, discarding\n",
1972                    data_unit_size);
1973             buf_idx += 4;
1974             continue;
1975         }
1976         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1977         ret = dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size);
1978         if (ret < 0)
1979         {
1980             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1981             return ret;
1982         }
1983         buf_idx += data_unit_size;
1984     }
1985
1986     if (!s->current_picture)
1987         return buf_size;
1988
1989     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1990         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1991
1992         s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1993
1994         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1995             int min_num = s->delay_frames[0]->avframe->display_picture_number;
1996             /* Too many delayed frames, so we display the frame with the lowest pts */
1997             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1998
1999             for (i = 1; s->delay_frames[i]; i++)
2000                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
2001                     min_num = s->delay_frames[i]->avframe->display_picture_number;
2002
2003             delayed_frame = remove_frame(s->delay_frames, min_num);
2004             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
2005         }
2006
2007         if (delayed_frame) {
2008             delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
2009             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
2010                 return ret;
2011             *got_frame = 1;
2012         }
2013     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
2014         /* The right frame at the right time :-) */
2015         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
2016             return ret;
2017         *got_frame = 1;
2018     }
2019
2020     if (*got_frame)
2021         s->frame_number = picture->display_picture_number + 1;
2022
2023     return buf_idx;
2024 }
2025
2026 AVCodec ff_dirac_decoder = {
2027     .name           = "dirac",
2028     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
2029     .type           = AVMEDIA_TYPE_VIDEO,
2030     .id             = AV_CODEC_ID_DIRAC,
2031     .priv_data_size = sizeof(DiracContext),
2032     .init           = dirac_decode_init,
2033     .close          = dirac_decode_end,
2034     .decode         = dirac_decode_frame,
2035     .capabilities   = CODEC_CAP_DELAY,
2036     .flush          = dirac_decode_flush,
2037 };