git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "golomb.h"
  34 #include "dirac_arith.h"
  35 #include "mpeg12data.h"
  36 #include "libavcodec/mpegvideo.h"
  37 #include "mpegvideoencdsp.h"
  38 #include "dirac_dwt.h"
  39 #include "dirac.h"
  40 #include "diracdsp.h"
  41 #include "videodsp.h"
  42
  43 /**
  44  * The spec limits the number of wavelet decompositions to 4 for both
  45  * level 1 (VC-2) and 128 (long-gop default).
  46  * 5 decompositions is the maximum before >16-bit buffers are needed.
  47  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  48  * the others to 4 decompositions (or 3 for the fidelity filter).
  49  *
  50  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  51  */
  52 #define MAX_DWT_LEVELS 5
  53
  54 /**
  55  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  56  */
  57 #define MAX_REFERENCE_FRAMES 8
  58 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  59 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  60 #define MAX_QUANT 68        /* max quant for VC-2 */
  61 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  62
  63 /**
  64  * DiracBlock->ref flags, if set then the block does MC from the given ref
  65  */
  66 #define DIRAC_REF_MASK_REF1   1
  67 #define DIRAC_REF_MASK_REF2   2
  68 #define DIRAC_REF_MASK_GLOBAL 4
  69
  70 /**
  71  * Value of Picture.reference when Picture is not a reference picture, but
  72  * is held for delayed output.
  73  */
  74 #define DELAYED_PIC_REF 4
  75
  76 #define CALC_PADDING(size, depth)                       \
  77     (((size + (1 << depth) - 1) >> depth) << depth)
  78
  79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  80
  81 typedef struct {
  82     AVFrame *avframe;
  83     int interpolated[3];    /* 1 if hpel[] is valid */
  84     uint8_t *hpel[3][4];
  85     uint8_t *hpel_base[3][4];
  86     int reference;
  87 } DiracFrame;
  88
  89 typedef struct {
  90     union {
  91         int16_t mv[2][2];
  92         int16_t dc[3];
  93     } u; /* anonymous unions aren't in C99 :( */
  94     uint8_t ref;
  95 } DiracBlock;
  96
  97 typedef struct SubBand {
  98     int level;
  99     int orientation;
 100     int stride;
 101     int width;
 102     int height;
 103     int quant;
 104     IDWTELEM *ibuf;
 105     struct SubBand *parent;
 106
 107     /* for low delay */
 108     unsigned length;
 109     const uint8_t *coeff_data;
 110 } SubBand;
 111
 112 typedef struct Plane {
 113     int width;
 114     int height;
 115     ptrdiff_t stride;
 116
 117     int idwt_width;
 118     int idwt_height;
 119     int idwt_stride;
 120     IDWTELEM *idwt_buf;
 121     IDWTELEM *idwt_buf_base;
 122     IDWTELEM *idwt_tmp;
 123
 124     /* block length */
 125     uint8_t xblen;
 126     uint8_t yblen;
 127     /* block separation (block n+1 starts after this many pixels in block n) */
 128     uint8_t xbsep;
 129     uint8_t ybsep;
 130     /* amount of overspill on each edge (half of the overlap between blocks) */
 131     uint8_t xoffset;
 132     uint8_t yoffset;
 133
 134     SubBand band[MAX_DWT_LEVELS][4];
 135 } Plane;
 136
 137 typedef struct DiracContext {
 138     AVCodecContext *avctx;
 139     MpegvideoEncDSPContext mpvencdsp;
 140     VideoDSPContext vdsp;
 141     DiracDSPContext diracdsp;
 142     GetBitContext gb;
 143     dirac_source_params source;
 144     int seen_sequence_header;
 145     int frame_number;           /* number of the next frame to display       */
 146     Plane plane[3];
 147     int chroma_x_shift;
 148     int chroma_y_shift;
 149
 150     int zero_res;               /* zero residue flag                         */
 151     int is_arith;               /* whether coeffs use arith or golomb coding */
 152     int low_delay;              /* use the low delay syntax                  */
 153     int globalmc_flag;          /* use global motion compensation            */
 154     int num_refs;               /* number of reference pictures              */
 155
 156     /* wavelet decoding */
 157     unsigned wavelet_depth;     /* depth of the IDWT                         */
 158     unsigned wavelet_idx;
 159
 160     /**
 161      * schroedinger older than 1.0.8 doesn't store
 162      * quant delta if only one codebook exists in a band
 163      */
 164     unsigned old_delta_quant;
 165     unsigned codeblock_mode;
 166
 167     struct {
 168         unsigned width;
 169         unsigned height;
 170     } codeblock[MAX_DWT_LEVELS+1];
 171
 172     struct {
 173         unsigned num_x;         /* number of horizontal slices               */
 174         unsigned num_y;         /* number of vertical slices                 */
 175         AVRational bytes;       /* average bytes per slice                   */
 176         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 177     } lowdelay;
 178
 179     struct {
 180         int pan_tilt[2];        /* pan/tilt vector                           */
 181         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 182         int perspective[2];     /* perspective vector                        */
 183         unsigned zrs_exp;
 184         unsigned perspective_exp;
 185     } globalmc[2];
 186
 187     /* motion compensation */
 188     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 189     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 190     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 191
 192     int blwidth;                /* number of blocks (horizontally)           */
 193     int blheight;               /* number of blocks (vertically)             */
 194     int sbwidth;                /* number of superblocks (horizontally)      */
 195     int sbheight;               /* number of superblocks (vertically)        */
 196
 197     uint8_t *sbsplit;
 198     DiracBlock *blmotion;
 199
 200     uint8_t *edge_emu_buffer[4];
 201     uint8_t *edge_emu_buffer_base;
 202
 203     uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
 204     uint8_t *mcscratch;
 205     int buffer_stride;
 206
 207     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 208
 209     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 210     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 211     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 212     dirac_weight_func weight_func;
 213     dirac_biweight_func biweight_func;
 214
 215     DiracFrame *current_picture;
 216     DiracFrame *ref_pics[2];
 217
 218     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 219     DiracFrame *delay_frames[MAX_DELAY+1];
 220     DiracFrame all_frames[MAX_FRAMES];
 221 } DiracContext;
 222
 223 /**
 224  * Dirac Specification ->
 225  * Parse code values. 9.6.1 Table 9.1
 226  */
 227 enum dirac_parse_code {
 228     pc_seq_header         = 0x00,
 229     pc_eos                = 0x10,
 230     pc_aux_data           = 0x20,
 231     pc_padding            = 0x30,
 232 };
 233
 234 enum dirac_subband {
 235     subband_ll = 0,
 236     subband_hl = 1,
 237     subband_lh = 2,
 238     subband_hh = 3,
 239     subband_nb,
 240 };
 241
 242 static const uint8_t default_qmat[][4][4] = {
 243     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 244     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 245     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 246     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 247     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 248     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 249     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 250 };
 251
 252 static const int qscale_tab[MAX_QUANT+1] = {
 253     4,     5,     6,     7,     8,    10,    11,    13,
 254     16,    19,    23,    27,    32,    38,    45,    54,
 255     64,    76,    91,   108,   128,   152,   181,   215,
 256     256,   304,   362,   431,   512,   609,   724,   861,
 257     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 258     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 259     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 260     65536, 77936
 261 };
 262
 263 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 264     1,     2,     3,     4,     4,     5,     6,     7,
 265     8,    10,    12,    14,    16,    19,    23,    27,
 266     32,    38,    46,    54,    64,    76,    91,   108,
 267     128,   152,   181,   216,   256,   305,   362,   431,
 268     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 269     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 270     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 271     32768, 38968
 272 };
 273
 274 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 275     1,     2,     2,     3,     3,     4,     4,     5,
 276     6,     7,     9,    10,    12,    14,    17,    20,
 277     24,    29,    34,    41,    48,    57,    68,    81,
 278     96,   114,   136,   162,   192,   228,   272,   323,
 279     384,   457,   543,   646,   768,   913,  1086,  1292,
 280     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 281     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 282     24576, 29226
 283 };
 284
 285 /* magic number division by 3 from schroedinger */
 286 static inline int divide3(int x)
 287 {
 288     return ((x+1)*21845 + 10922) >> 16;
 289 }
 290
 291 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 292 {
 293     DiracFrame *remove_pic = NULL;
 294     int i, remove_idx = -1;
 295
 296     for (i = 0; framelist[i]; i++)
 297         if (framelist[i]->avframe->display_picture_number == picnum) {
 298             remove_pic = framelist[i];
 299             remove_idx = i;
 300         }
 301
 302     if (remove_pic)
 303         for (i = remove_idx; framelist[i]; i++)
 304             framelist[i] = framelist[i+1];
 305
 306     return remove_pic;
 307 }
 308
 309 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 310 {
 311     int i;
 312     for (i = 0; i < maxframes; i++)
 313         if (!framelist[i]) {
 314             framelist[i] = frame;
 315             return 0;
 316         }
 317     return -1;
 318 }
 319
 320 static int alloc_sequence_buffers(DiracContext *s)
 321 {
 322     int sbwidth  = DIVRNDUP(s->source.width,  4);
 323     int sbheight = DIVRNDUP(s->source.height, 4);
 324     int i, w, h, top_padding;
 325
 326     /* todo: think more about this / use or set Plane here */
 327     for (i = 0; i < 3; i++) {
 328         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 329         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 330         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 331         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 332
 333         /* we allocate the max we support here since num decompositions can
 334          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 335          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 336          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 337          * on each side */
 338         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 339         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 340         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 341
 342         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
 343         s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
 344         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 345         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 346             return AVERROR(ENOMEM);
 347     }
 348
 349     /* fixme: allocate using real stride here */
 350     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 351     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 352
 353     if (!s->sbsplit || !s->blmotion)
 354         return AVERROR(ENOMEM);
 355     return 0;
 356 }
 357
 358 static int alloc_buffers(DiracContext *s, int stride)
 359 {
 360     int w = s->source.width;
 361     int h = s->source.height;
 362
 363     av_assert0(stride >= w);
 364     stride += 64;
 365
 366     if (s->buffer_stride >= stride)
 367         return 0;
 368     s->buffer_stride = 0;
 369
 370     av_freep(&s->edge_emu_buffer_base);
 371     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 372     av_freep(&s->mctmp);
 373     av_freep(&s->mcscratch);
 374
 375     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 376
 377     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 378     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 379
 380     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 381         return AVERROR(ENOMEM);
 382
 383     s->buffer_stride = stride;
 384     return 0;
 385 }
 386
 387 static void free_sequence_buffers(DiracContext *s)
 388 {
 389     int i, j, k;
 390
 391     for (i = 0; i < MAX_FRAMES; i++) {
 392         if (s->all_frames[i].avframe->data[0]) {
 393             av_frame_unref(s->all_frames[i].avframe);
 394             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 395         }
 396
 397         for (j = 0; j < 3; j++)
 398             for (k = 1; k < 4; k++)
 399                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 400     }
 401
 402     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 403     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 404
 405     for (i = 0; i < 3; i++) {
 406         av_freep(&s->plane[i].idwt_buf_base);
 407         av_freep(&s->plane[i].idwt_tmp);
 408     }
 409
 410     s->buffer_stride = 0;
 411     av_freep(&s->sbsplit);
 412     av_freep(&s->blmotion);
 413     av_freep(&s->edge_emu_buffer_base);
 414
 415     av_freep(&s->mctmp);
 416     av_freep(&s->mcscratch);
 417 }
 418
 419 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 420 {
 421     DiracContext *s = avctx->priv_data;
 422     int i;
 423
 424     s->avctx = avctx;
 425     s->frame_number = -1;
 426
 427     ff_diracdsp_init(&s->diracdsp);
 428     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 429     ff_videodsp_init(&s->vdsp, 8);
 430
 431     for (i = 0; i < MAX_FRAMES; i++) {
 432         s->all_frames[i].avframe = av_frame_alloc();
 433         if (!s->all_frames[i].avframe) {
 434             while (i > 0)
 435                 av_frame_free(&s->all_frames[--i].avframe);
 436             return AVERROR(ENOMEM);
 437         }
 438     }
 439
 440     return 0;
 441 }
 442
 443 static void dirac_decode_flush(AVCodecContext *avctx)
 444 {
 445     DiracContext *s = avctx->priv_data;
 446     free_sequence_buffers(s);
 447     s->seen_sequence_header = 0;
 448     s->frame_number = -1;
 449 }
 450
 451 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 452 {
 453     DiracContext *s = avctx->priv_data;
 454     int i;
 455
 456     dirac_decode_flush(avctx);
 457     for (i = 0; i < MAX_FRAMES; i++)
 458         av_frame_free(&s->all_frames[i].avframe);
 459
 460     return 0;
 461 }
 462
 463 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 464
 465 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 466                                       SubBand *b, IDWTELEM *buf, int x, int y)
 467 {
 468     int coeff, sign;
 469     int sign_pred = 0;
 470     int pred_ctx = CTX_ZPZN_F1;
 471
 472     /* Check if the parent subband has a 0 in the corresponding position */
 473     if (b->parent)
 474         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 475
 476     if (b->orientation == subband_hl)
 477         sign_pred = buf[-b->stride];
 478
 479     /* Determine if the pixel has only zeros in its neighbourhood */
 480     if (x) {
 481         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 482         if (b->orientation == subband_lh)
 483             sign_pred = buf[-1];
 484     } else {
 485         pred_ctx += !buf[-b->stride];
 486     }
 487
 488     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 489     if (coeff) {
 490         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 491         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 492         coeff = (coeff ^ -sign) + sign;
 493     }
 494     *buf = coeff;
 495 }
 496
 497 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 498 {
 499     int sign, coeff;
 500
 501     coeff = svq3_get_ue_golomb(gb);
 502     if (coeff) {
 503         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 504         sign  = get_bits1(gb);
 505         coeff = (coeff ^ -sign) + sign;
 506     }
 507     return coeff;
 508 }
 509
 510 /**
 511  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 512  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 513  */
 514 static inline void codeblock(DiracContext *s, SubBand *b,
 515                              GetBitContext *gb, DiracArith *c,
 516                              int left, int right, int top, int bottom,
 517                              int blockcnt_one, int is_arith)
 518 {
 519     int x, y, zero_block;
 520     int qoffset, qfactor;
 521     IDWTELEM *buf;
 522
 523     /* check for any coded coefficients in this codeblock */
 524     if (!blockcnt_one) {
 525         if (is_arith)
 526             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 527         else
 528             zero_block = get_bits1(gb);
 529
 530         if (zero_block)
 531             return;
 532     }
 533
 534     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 535         int quant = b->quant;
 536         if (is_arith)
 537             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 538         else
 539             quant += dirac_get_se_golomb(gb);
 540         if (quant < 0) {
 541             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 542             return;
 543         }
 544         b->quant = quant;
 545     }
 546
 547     b->quant = FFMIN(b->quant, MAX_QUANT);
 548
 549     qfactor = qscale_tab[b->quant];
 550     /* TODO: context pointer? */
 551     if (!s->num_refs)
 552         qoffset = qoffset_intra_tab[b->quant];
 553     else
 554         qoffset = qoffset_inter_tab[b->quant];
 555
 556     buf = b->ibuf + top * b->stride;
 557     for (y = top; y < bottom; y++) {
 558         for (x = left; x < right; x++) {
 559             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 560             if (is_arith)
 561                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 562             else
 563                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 564         }
 565         buf += b->stride;
 566     }
 567 }
 568
 569 /**
 570  * Dirac Specification ->
 571  * 13.3 intra_dc_prediction(band)
 572  */
 573 static inline void intra_dc_prediction(SubBand *b)
 574 {
 575     IDWTELEM *buf = b->ibuf;
 576     int x, y;
 577
 578     for (x = 1; x < b->width; x++)
 579         buf[x] += buf[x-1];
 580     buf += b->stride;
 581
 582     for (y = 1; y < b->height; y++) {
 583         buf[0] += buf[-b->stride];
 584
 585         for (x = 1; x < b->width; x++) {
 586             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 587             buf[x]  += divide3(pred);
 588         }
 589         buf += b->stride;
 590     }
 591 }
 592
 593 /**
 594  * Dirac Specification ->
 595  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 596  */
 597 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 598 {
 599     int cb_x, cb_y, left, right, top, bottom;
 600     DiracArith c;
 601     GetBitContext gb;
 602     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 603     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 604     int blockcnt_one = (cb_width + cb_height) == 2;
 605
 606     if (!b->length)
 607         return;
 608
 609     init_get_bits8(&gb, b->coeff_data, b->length);
 610
 611     if (is_arith)
 612         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 613
 614     top = 0;
 615     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 616         bottom = (b->height * (cb_y+1LL)) / cb_height;
 617         left = 0;
 618         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 619             right = (b->width * (cb_x+1LL)) / cb_width;
 620             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 621             left = right;
 622         }
 623         top = bottom;
 624     }
 625
 626     if (b->orientation == subband_ll && s->num_refs == 0)
 627         intra_dc_prediction(b);
 628 }
 629
 630 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 631 {
 632     DiracContext *s = avctx->priv_data;
 633     decode_subband_internal(s, b, 1);
 634     return 0;
 635 }
 636
 637 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 638 {
 639     DiracContext *s = avctx->priv_data;
 640     SubBand **b     = arg;
 641     decode_subband_internal(s, *b, 0);
 642     return 0;
 643 }
 644
 645 /**
 646  * Dirac Specification ->
 647  * [DIRAC_STD] 13.4.1 core_transform_data()
 648  */
 649 static void decode_component(DiracContext *s, int comp)
 650 {
 651     AVCodecContext *avctx = s->avctx;
 652     SubBand *bands[3*MAX_DWT_LEVELS+1];
 653     enum dirac_subband orientation;
 654     int level, num_bands = 0;
 655
 656     /* Unpack all subbands at all levels. */
 657     for (level = 0; level < s->wavelet_depth; level++) {
 658         for (orientation = !!level; orientation < 4; orientation++) {
 659             SubBand *b = &s->plane[comp].band[level][orientation];
 660             bands[num_bands++] = b;
 661
 662             align_get_bits(&s->gb);
 663             /* [DIRAC_STD] 13.4.2 subband() */
 664             b->length = svq3_get_ue_golomb(&s->gb);
 665             if (b->length) {
 666                 b->quant = svq3_get_ue_golomb(&s->gb);
 667                 align_get_bits(&s->gb);
 668                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 669                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 670                 skip_bits_long(&s->gb, b->length*8);
 671             }
 672         }
 673         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 674         if (s->is_arith)
 675             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 676                            NULL, 4-!!level, sizeof(SubBand));
 677     }
 678     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 679     if (!s->is_arith)
 680         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 681 }
 682
 683 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 684 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 685 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 686                              int slice_x, int slice_y, int bits_end,
 687                              SubBand *b1, SubBand *b2)
 688 {
 689     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 690     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 691     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 692     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 693
 694     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 695     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 696
 697     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 698     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 699     int x, y;
 700     /* we have to constantly check for overread since the spec explicitly
 701        requires this, with the meaning that all remaining coeffs are set to 0 */
 702     if (get_bits_count(gb) >= bits_end)
 703         return;
 704
 705     for (y = top; y < bottom; y++) {
 706         for (x = left; x < right; x++) {
 707             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 708             if (get_bits_count(gb) >= bits_end)
 709                 return;
 710             if (buf2) {
 711                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 712                 if (get_bits_count(gb) >= bits_end)
 713                     return;
 714             }
 715         }
 716         buf1 += b1->stride;
 717         if (buf2)
 718             buf2 += b2->stride;
 719     }
 720 }
 721
 722 struct lowdelay_slice {
 723     GetBitContext gb;
 724     int slice_x;
 725     int slice_y;
 726     int bytes;
 727 };
 728
 729
 730 /**
 731  * Dirac Specification ->
 732  * 13.5.2 Slices. slice(sx,sy)
 733  */
 734 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 735 {
 736     DiracContext *s = avctx->priv_data;
 737     struct lowdelay_slice *slice = arg;
 738     GetBitContext *gb = &slice->gb;
 739     enum dirac_subband orientation;
 740     int level, quant, chroma_bits, chroma_end;
 741
 742     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 743     int length_bits = av_log2(8 * slice->bytes)+1;
 744     int luma_bits   = get_bits_long(gb, length_bits);
 745     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 746
 747     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 748     for (level = 0; level < s->wavelet_depth; level++)
 749         for (orientation = !!level; orientation < 4; orientation++) {
 750             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 751             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 752                              &s->plane[0].band[level][orientation], NULL);
 753         }
 754
 755     /* consume any unused bits from luma */
 756     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 757
 758     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 759     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 760     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 761     for (level = 0; level < s->wavelet_depth; level++)
 762         for (orientation = !!level; orientation < 4; orientation++) {
 763             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 764             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 765                              &s->plane[1].band[level][orientation],
 766                              &s->plane[2].band[level][orientation]);
 767         }
 768
 769     return 0;
 770 }
 771
 772 /**
 773  * Dirac Specification ->
 774  * 13.5.1 low_delay_transform_data()
 775  */
 776 static int decode_lowdelay(DiracContext *s)
 777 {
 778     AVCodecContext *avctx = s->avctx;
 779     int slice_x, slice_y, bytes, bufsize;
 780     const uint8_t *buf;
 781     struct lowdelay_slice *slices;
 782     int slice_num = 0;
 783
 784     slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 785     if (!slices)
 786         return AVERROR(ENOMEM);
 787
 788     align_get_bits(&s->gb);
 789     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 790     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 791     bufsize = get_bits_left(&s->gb);
 792
 793     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 794         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 795             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 796                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 797
 798             slices[slice_num].bytes   = bytes;
 799             slices[slice_num].slice_x = slice_x;
 800             slices[slice_num].slice_y = slice_y;
 801             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 802             slice_num++;
 803
 804             buf     += bytes;
 805             if (bufsize/8 >= bytes)
 806                 bufsize -= bytes*8;
 807             else
 808                 bufsize = 0;
 809         }
 810
 811     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 812                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 813     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 814     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 815     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 816     av_free(slices);
 817     return 0;
 818 }
 819
 820 static void init_planes(DiracContext *s)
 821 {
 822     int i, w, h, level, orientation;
 823
 824     for (i = 0; i < 3; i++) {
 825         Plane *p = &s->plane[i];
 826
 827         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 828         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 829         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 830         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 831         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 832
 833         for (level = s->wavelet_depth-1; level >= 0; level--) {
 834             w = w>>1;
 835             h = h>>1;
 836             for (orientation = !!level; orientation < 4; orientation++) {
 837                 SubBand *b = &p->band[level][orientation];
 838
 839                 b->ibuf   = p->idwt_buf;
 840                 b->level  = level;
 841                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 842                 b->width  = w;
 843                 b->height = h;
 844                 b->orientation = orientation;
 845
 846                 if (orientation & 1)
 847                     b->ibuf += w;
 848                 if (orientation > 1)
 849                     b->ibuf += b->stride>>1;
 850
 851                 if (level)
 852                     b->parent = &p->band[level-1][orientation];
 853             }
 854         }
 855
 856         if (i > 0) {
 857             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 858             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 859             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 860             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 861         }
 862
 863         p->xoffset = (p->xblen - p->xbsep)/2;
 864         p->yoffset = (p->yblen - p->ybsep)/2;
 865     }
 866 }
 867
 868 /**
 869  * Unpack the motion compensation parameters
 870  * Dirac Specification ->
 871  * 11.2 Picture prediction data. picture_prediction()
 872  */
 873 static int dirac_unpack_prediction_parameters(DiracContext *s)
 874 {
 875     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 876
 877     GetBitContext *gb = &s->gb;
 878     unsigned idx, ref;
 879
 880     align_get_bits(gb);
 881     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 882     /* Luma and Chroma are equal. 11.2.3 */
 883     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 884
 885     if (idx > 4) {
 886         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 887         return AVERROR_INVALIDDATA;
 888     }
 889
 890     if (idx == 0) {
 891         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 892         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 893         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 894         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 895     } else {
 896         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 897         s->plane[0].xblen = default_blen[idx-1];
 898         s->plane[0].yblen = default_blen[idx-1];
 899         s->plane[0].xbsep = 4 * idx;
 900         s->plane[0].ybsep = 4 * idx;
 901     }
 902     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 903       Calculated in function dirac_unpack_block_motion_data */
 904
 905     if (s->plane[0].xblen % (1 << s->chroma_x_shift) != 0 ||
 906         s->plane[0].yblen % (1 << s->chroma_y_shift) != 0 ||
 907         !s->plane[0].xblen || !s->plane[0].yblen) {
 908         av_log(s->avctx, AV_LOG_ERROR,
 909                "invalid x/y block length (%d/%d) for x/y chroma shift (%d/%d)\n",
 910                s->plane[0].xblen, s->plane[0].yblen, s->chroma_x_shift, s->chroma_y_shift);
 911         return AVERROR_INVALIDDATA;
 912     }
 913     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 914         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 915         return AVERROR_INVALIDDATA;
 916     }
 917     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 918         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 919         return AVERROR_INVALIDDATA;
 920     }
 921     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 922         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 923         return AVERROR_PATCHWELCOME;
 924     }
 925
 926     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 927       Read motion vector precision */
 928     s->mv_precision = svq3_get_ue_golomb(gb);
 929     if (s->mv_precision > 3) {
 930         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 931         return AVERROR_INVALIDDATA;
 932     }
 933
 934     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 935       Read the global motion compensation parameters */
 936     s->globalmc_flag = get_bits1(gb);
 937     if (s->globalmc_flag) {
 938         memset(s->globalmc, 0, sizeof(s->globalmc));
 939         /* [DIRAC_STD] pan_tilt(gparams) */
 940         for (ref = 0; ref < s->num_refs; ref++) {
 941             if (get_bits1(gb)) {
 942                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 943                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 944             }
 945             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 946                zoom/rotation/shear parameters */
 947             if (get_bits1(gb)) {
 948                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 949                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 950                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 951                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 952                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 953             } else {
 954                 s->globalmc[ref].zrs[0][0] = 1;
 955                 s->globalmc[ref].zrs[1][1] = 1;
 956             }
 957             /* [DIRAC_STD] perspective(gparams) */
 958             if (get_bits1(gb)) {
 959                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 960                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 961                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 962             }
 963         }
 964     }
 965
 966     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 967       Picture prediction mode, not currently used. */
 968     if (svq3_get_ue_golomb(gb)) {
 969         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 970         return AVERROR_INVALIDDATA;
 971     }
 972
 973     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 974        just data read, weight calculation will be done later on. */
 975     s->weight_log2denom = 1;
 976     s->weight[0]        = 1;
 977     s->weight[1]        = 1;
 978
 979     if (get_bits1(gb)) {
 980         s->weight_log2denom = svq3_get_ue_golomb(gb);
 981         s->weight[0] = dirac_get_se_golomb(gb);
 982         if (s->num_refs == 2)
 983             s->weight[1] = dirac_get_se_golomb(gb);
 984     }
 985     return 0;
 986 }
 987
 988 /**
 989  * Dirac Specification ->
 990  * 11.3 Wavelet transform data. wavelet_transform()
 991  */
 992 static int dirac_unpack_idwt_params(DiracContext *s)
 993 {
 994     GetBitContext *gb = &s->gb;
 995     int i, level;
 996     unsigned tmp;
 997
 998 #define CHECKEDREAD(dst, cond, errmsg) \
 999     tmp = svq3_get_ue_golomb(gb); \
1000     if (cond) { \
1001         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
1002         return AVERROR_INVALIDDATA; \
1003     }\
1004     dst = tmp;
1005
1006     align_get_bits(gb);
1007
1008     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
1009     if (s->zero_res)
1010         return 0;
1011
1012     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
1013     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1014
1015     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1016
1017     if (!s->low_delay) {
1018         /* Codeblock parameters (core syntax only) */
1019         if (get_bits1(gb)) {
1020             for (i = 0; i <= s->wavelet_depth; i++) {
1021                 CHECKEDREAD(s->codeblock[i].width , tmp < 1 || tmp > (s->avctx->width >>s->wavelet_depth-i), "codeblock width invalid\n")
1022                 CHECKEDREAD(s->codeblock[i].height, tmp < 1 || tmp > (s->avctx->height>>s->wavelet_depth-i), "codeblock height invalid\n")
1023             }
1024
1025             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1026         } else
1027             for (i = 0; i <= s->wavelet_depth; i++)
1028                 s->codeblock[i].width = s->codeblock[i].height = 1;
1029     } else {
1030         /* Slice parameters + quantization matrix*/
1031         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1032         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
1033         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
1034         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1035         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1036
1037         if (s->lowdelay.bytes.den <= 0) {
1038             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1039             return AVERROR_INVALIDDATA;
1040         }
1041
1042         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1043         if (get_bits1(gb)) {
1044             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1045             /* custom quantization matrix */
1046             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1047             for (level = 0; level < s->wavelet_depth; level++) {
1048                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1049                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1050                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1051             }
1052         } else {
1053             if (s->wavelet_depth > 4) {
1054                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1055                 return AVERROR_INVALIDDATA;
1056             }
1057             /* default quantization matrix */
1058             for (level = 0; level < s->wavelet_depth; level++)
1059                 for (i = 0; i < 4; i++) {
1060                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1061                     /* haar with no shift differs for different depths */
1062                     if (s->wavelet_idx == 3)
1063                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1064                 }
1065         }
1066     }
1067     return 0;
1068 }
1069
1070 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1071 {
1072     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1073
1074     if (!(x|y))
1075         return 0;
1076     else if (!y)
1077         return sbsplit[-1];
1078     else if (!x)
1079         return sbsplit[-stride];
1080
1081     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1082 }
1083
1084 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1085 {
1086     int pred;
1087
1088     if (!(x|y))
1089         return 0;
1090     else if (!y)
1091         return block[-1].ref & refmask;
1092     else if (!x)
1093         return block[-stride].ref & refmask;
1094
1095     /* return the majority */
1096     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1097     return (pred >> 1) & refmask;
1098 }
1099
1100 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1101 {
1102     int i, n = 0;
1103
1104     memset(block->u.dc, 0, sizeof(block->u.dc));
1105
1106     if (x && !(block[-1].ref & 3)) {
1107         for (i = 0; i < 3; i++)
1108             block->u.dc[i] += block[-1].u.dc[i];
1109         n++;
1110     }
1111
1112     if (y && !(block[-stride].ref & 3)) {
1113         for (i = 0; i < 3; i++)
1114             block->u.dc[i] += block[-stride].u.dc[i];
1115         n++;
1116     }
1117
1118     if (x && y && !(block[-1-stride].ref & 3)) {
1119         for (i = 0; i < 3; i++)
1120             block->u.dc[i] += block[-1-stride].u.dc[i];
1121         n++;
1122     }
1123
1124     if (n == 2) {
1125         for (i = 0; i < 3; i++)
1126             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1127     } else if (n == 3) {
1128         for (i = 0; i < 3; i++)
1129             block->u.dc[i] = divide3(block->u.dc[i]);
1130     }
1131 }
1132
1133 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1134 {
1135     int16_t *pred[3];
1136     int refmask = ref+1;
1137     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1138     int n = 0;
1139
1140     if (x && (block[-1].ref & mask) == refmask)
1141         pred[n++] = block[-1].u.mv[ref];
1142
1143     if (y && (block[-stride].ref & mask) == refmask)
1144         pred[n++] = block[-stride].u.mv[ref];
1145
1146     if (x && y && (block[-stride-1].ref & mask) == refmask)
1147         pred[n++] = block[-stride-1].u.mv[ref];
1148
1149     switch (n) {
1150     case 0:
1151         block->u.mv[ref][0] = 0;
1152         block->u.mv[ref][1] = 0;
1153         break;
1154     case 1:
1155         block->u.mv[ref][0] = pred[0][0];
1156         block->u.mv[ref][1] = pred[0][1];
1157         break;
1158     case 2:
1159         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1160         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1161         break;
1162     case 3:
1163         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1164         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1165         break;
1166     }
1167 }
1168
1169 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1170 {
1171     int ez      = s->globalmc[ref].zrs_exp;
1172     int ep      = s->globalmc[ref].perspective_exp;
1173     int (*A)[2] = s->globalmc[ref].zrs;
1174     int *b      = s->globalmc[ref].pan_tilt;
1175     int *c      = s->globalmc[ref].perspective;
1176
1177     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1178     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1179     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1180
1181     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1182     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1183 }
1184
1185 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1186                                 int stride, int x, int y)
1187 {
1188     int i;
1189
1190     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1191     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1192
1193     if (s->num_refs == 2) {
1194         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1195         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1196     }
1197
1198     if (!block->ref) {
1199         pred_block_dc(block, stride, x, y);
1200         for (i = 0; i < 3; i++)
1201             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1202         return;
1203     }
1204
1205     if (s->globalmc_flag) {
1206         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1207         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1208     }
1209
1210     for (i = 0; i < s->num_refs; i++)
1211         if (block->ref & (i+1)) {
1212             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1213                 global_mv(s, block, x, y, i);
1214             } else {
1215                 pred_mv(block, stride, x, y, i);
1216                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1217                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1218             }
1219         }
1220 }
1221
1222 /**
1223  * Copies the current block to the other blocks covered by the current superblock split mode
1224  */
1225 static void propagate_block_data(DiracBlock *block, int stride, int size)
1226 {
1227     int x, y;
1228     DiracBlock *dst = block;
1229
1230     for (x = 1; x < size; x++)
1231         dst[x] = *block;
1232
1233     for (y = 1; y < size; y++) {
1234         dst += stride;
1235         for (x = 0; x < size; x++)
1236             dst[x] = *block;
1237     }
1238 }
1239
1240 /**
1241  * Dirac Specification ->
1242  * 12. Block motion data syntax
1243  */
1244 static int dirac_unpack_block_motion_data(DiracContext *s)
1245 {
1246     GetBitContext *gb = &s->gb;
1247     uint8_t *sbsplit = s->sbsplit;
1248     int i, x, y, q, p;
1249     DiracArith arith[8];
1250
1251     align_get_bits(gb);
1252
1253     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1254     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1255     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1256     s->blwidth  = 4 * s->sbwidth;
1257     s->blheight = 4 * s->sbheight;
1258
1259     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1260        decode superblock split modes */
1261     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1262     for (y = 0; y < s->sbheight; y++) {
1263         for (x = 0; x < s->sbwidth; x++) {
1264             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1265             if (split > 2)
1266                 return AVERROR_INVALIDDATA;
1267             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1268         }
1269         sbsplit += s->sbwidth;
1270     }
1271
1272     /* setup arith decoding */
1273     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1274     for (i = 0; i < s->num_refs; i++) {
1275         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1276         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1277     }
1278     for (i = 0; i < 3; i++)
1279         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1280
1281     for (y = 0; y < s->sbheight; y++)
1282         for (x = 0; x < s->sbwidth; x++) {
1283             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1284             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1285
1286             for (q = 0; q < blkcnt; q++)
1287                 for (p = 0; p < blkcnt; p++) {
1288                     int bx = 4 * x + p*step;
1289                     int by = 4 * y + q*step;
1290                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1291                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1292                     propagate_block_data(block, s->blwidth, step);
1293                 }
1294         }
1295
1296     return 0;
1297 }
1298
1299 static int weight(int i, int blen, int offset)
1300 {
1301 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1302     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1303
1304     if (i < 2*offset)
1305         return ROLLOFF(i);
1306     else if (i > blen-1 - 2*offset)
1307         return ROLLOFF(blen-1 - i);
1308     return 8;
1309 }
1310
1311 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1312                                  int left, int right, int wy)
1313 {
1314     int x;
1315     for (x = 0; left && x < p->xblen >> 1; x++)
1316         obmc_weight[x] = wy*8;
1317     for (; x < p->xblen >> right; x++)
1318         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1319     for (; x < p->xblen; x++)
1320         obmc_weight[x] = wy*8;
1321     for (; x < stride; x++)
1322         obmc_weight[x] = 0;
1323 }
1324
1325 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1326                              int left, int right, int top, int bottom)
1327 {
1328     int y;
1329     for (y = 0; top && y < p->yblen >> 1; y++) {
1330         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1331         obmc_weight += stride;
1332     }
1333     for (; y < p->yblen >> bottom; y++) {
1334         int wy = weight(y, p->yblen, p->yoffset);
1335         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1336         obmc_weight += stride;
1337     }
1338     for (; y < p->yblen; y++) {
1339         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1340         obmc_weight += stride;
1341     }
1342 }
1343
1344 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1345 {
1346     int top = !by;
1347     int bottom = by == s->blheight-1;
1348
1349     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1350     if (top || bottom || by == 1) {
1351         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1352         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1353         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1354     }
1355 }
1356
1357 static const uint8_t epel_weights[4][4][4] = {
1358     {{ 16,  0,  0,  0 },
1359      { 12,  4,  0,  0 },
1360      {  8,  8,  0,  0 },
1361      {  4, 12,  0,  0 }},
1362     {{ 12,  0,  4,  0 },
1363      {  9,  3,  3,  1 },
1364      {  6,  6,  2,  2 },
1365      {  3,  9,  1,  3 }},
1366     {{  8,  0,  8,  0 },
1367      {  6,  2,  6,  2 },
1368      {  4,  4,  4,  4 },
1369      {  2,  6,  2,  6 }},
1370     {{  4,  0, 12,  0 },
1371      {  3,  1,  9,  3 },
1372      {  2,  2,  6,  6 },
1373      {  1,  3,  3,  9 }}
1374 };
1375
1376 /**
1377  * For block x,y, determine which of the hpel planes to do bilinear
1378  * interpolation from and set src[] to the location in each hpel plane
1379  * to MC from.
1380  *
1381  * @return the index of the put_dirac_pixels_tab function to use
1382  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1383  */
1384 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1385                      int x, int y, int ref, int plane)
1386 {
1387     Plane *p = &s->plane[plane];
1388     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1389     int motion_x = block->u.mv[ref][0];
1390     int motion_y = block->u.mv[ref][1];
1391     int mx, my, i, epel, nplanes = 0;
1392
1393     if (plane) {
1394         motion_x >>= s->chroma_x_shift;
1395         motion_y >>= s->chroma_y_shift;
1396     }
1397
1398     mx         = motion_x & ~(-1U << s->mv_precision);
1399     my         = motion_y & ~(-1U << s->mv_precision);
1400     motion_x >>= s->mv_precision;
1401     motion_y >>= s->mv_precision;
1402     /* normalize subpel coordinates to epel */
1403     /* TODO: template this function? */
1404     mx      <<= 3 - s->mv_precision;
1405     my      <<= 3 - s->mv_precision;
1406
1407     x += motion_x;
1408     y += motion_y;
1409     epel = (mx|my)&1;
1410
1411     /* hpel position */
1412     if (!((mx|my)&3)) {
1413         nplanes = 1;
1414         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1415     } else {
1416         /* qpel or epel */
1417         nplanes = 4;
1418         for (i = 0; i < 4; i++)
1419             src[i] = ref_hpel[i] + y*p->stride + x;
1420
1421         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1422            we increment x/y because the edge changes for half of the pixels */
1423         if (mx > 4) {
1424             src[0] += 1;
1425             src[2] += 1;
1426             x++;
1427         }
1428         if (my > 4) {
1429             src[0] += p->stride;
1430             src[1] += p->stride;
1431             y++;
1432         }
1433
1434         /* hpel planes are:
1435            [0]: F  [1]: H
1436            [2]: V  [3]: C */
1437         if (!epel) {
1438             /* check if we really only need 2 planes since either mx or my is
1439                a hpel position. (epel weights of 0 handle this there) */
1440             if (!(mx&3)) {
1441                 /* mx == 0: average [0] and [2]
1442                    mx == 4: average [1] and [3] */
1443                 src[!mx] = src[2 + !!mx];
1444                 nplanes = 2;
1445             } else if (!(my&3)) {
1446                 src[0] = src[(my>>1)  ];
1447                 src[1] = src[(my>>1)+1];
1448                 nplanes = 2;
1449             }
1450         } else {
1451             /* adjust the ordering if needed so the weights work */
1452             if (mx > 4) {
1453                 FFSWAP(const uint8_t *, src[0], src[1]);
1454                 FFSWAP(const uint8_t *, src[2], src[3]);
1455             }
1456             if (my > 4) {
1457                 FFSWAP(const uint8_t *, src[0], src[2]);
1458                 FFSWAP(const uint8_t *, src[1], src[3]);
1459             }
1460             src[4] = epel_weights[my&3][mx&3];
1461         }
1462     }
1463
1464     /* fixme: v/h _edge_pos */
1465     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1466         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1467         x < 0 || y < 0) {
1468         for (i = 0; i < nplanes; i++) {
1469             s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1470                                      p->stride, p->stride,
1471                                      p->xblen, p->yblen, x, y,
1472                                      p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1473             src[i] = s->edge_emu_buffer[i];
1474         }
1475     }
1476     return (nplanes>>1) + epel;
1477 }
1478
1479 static void add_dc(uint16_t *dst, int dc, int stride,
1480                    uint8_t *obmc_weight, int xblen, int yblen)
1481 {
1482     int x, y;
1483     dc += 128;
1484
1485     for (y = 0; y < yblen; y++) {
1486         for (x = 0; x < xblen; x += 2) {
1487             dst[x  ] += dc * obmc_weight[x  ];
1488             dst[x+1] += dc * obmc_weight[x+1];
1489         }
1490         dst          += stride;
1491         obmc_weight  += MAX_BLOCKSIZE;
1492     }
1493 }
1494
1495 static void block_mc(DiracContext *s, DiracBlock *block,
1496                      uint16_t *mctmp, uint8_t *obmc_weight,
1497                      int plane, int dstx, int dsty)
1498 {
1499     Plane *p = &s->plane[plane];
1500     const uint8_t *src[5];
1501     int idx;
1502
1503     switch (block->ref&3) {
1504     case 0: /* DC */
1505         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1506         return;
1507     case 1:
1508     case 2:
1509         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1510         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1511         if (s->weight_func)
1512             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1513                            s->weight[0] + s->weight[1], p->yblen);
1514         break;
1515     case 3:
1516         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1517         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1518         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1519         if (s->biweight_func) {
1520             /* fixme: +32 is a quick hack */
1521             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1522             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1523                              s->weight[0], s->weight[1], p->yblen);
1524         } else
1525             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1526         break;
1527     }
1528     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1529 }
1530
1531 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1532 {
1533     Plane *p = &s->plane[plane];
1534     int x, dstx = p->xbsep - p->xoffset;
1535
1536     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1537     mctmp += p->xbsep;
1538
1539     for (x = 1; x < s->blwidth-1; x++) {
1540         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1541         dstx  += p->xbsep;
1542         mctmp += p->xbsep;
1543     }
1544     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1545 }
1546
1547 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1548 {
1549     int idx = 0;
1550     if (xblen > 8)
1551         idx = 1;
1552     if (xblen > 16)
1553         idx = 2;
1554
1555     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1556     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1557     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1558     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1559         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1560         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1561     } else {
1562         s->weight_func   = NULL;
1563         s->biweight_func = NULL;
1564     }
1565 }
1566
1567 static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1568 {
1569     /* chroma allocates an edge of 8 when subsampled
1570        which for 4:2:2 means an h edge of 16 and v edge of 8
1571        just use 8 for everything for the moment */
1572     int i, edge = EDGE_WIDTH/2;
1573
1574     ref->hpel[plane][0] = ref->avframe->data[plane];
1575     s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1576
1577     /* no need for hpel if we only have fpel vectors */
1578     if (!s->mv_precision)
1579         return 0;
1580
1581     for (i = 1; i < 4; i++) {
1582         if (!ref->hpel_base[plane][i])
1583             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1584         if (!ref->hpel_base[plane][i]) {
1585             return AVERROR(ENOMEM);
1586         }
1587         /* we need to be 16-byte aligned even for chroma */
1588         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1589     }
1590
1591     if (!ref->interpolated[plane]) {
1592         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1593                                       ref->hpel[plane][3], ref->hpel[plane][0],
1594                                       ref->avframe->linesize[plane], width, height);
1595         s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1596         s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1597         s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1598     }
1599     ref->interpolated[plane] = 1;
1600
1601     return 0;
1602 }
1603
1604 /**
1605  * Dirac Specification ->
1606  * 13.0 Transform data syntax. transform_data()
1607  */
1608 static int dirac_decode_frame_internal(DiracContext *s)
1609 {
1610     DWTContext d;
1611     int y, i, comp, dsty;
1612     int ret;
1613
1614     if (s->low_delay) {
1615         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1616         for (comp = 0; comp < 3; comp++) {
1617             Plane *p = &s->plane[comp];
1618             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1619         }
1620         if (!s->zero_res) {
1621             if ((ret = decode_lowdelay(s)) < 0)
1622                 return ret;
1623         }
1624     }
1625
1626     for (comp = 0; comp < 3; comp++) {
1627         Plane *p       = &s->plane[comp];
1628         uint8_t *frame = s->current_picture->avframe->data[comp];
1629
1630         /* FIXME: small resolutions */
1631         for (i = 0; i < 4; i++)
1632             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1633
1634         if (!s->zero_res && !s->low_delay)
1635         {
1636             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1637             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1638         }
1639         ret = ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1640                                     s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp);
1641         if (ret < 0)
1642             return ret;
1643
1644         if (!s->num_refs) { /* intra */
1645             for (y = 0; y < p->height; y += 16) {
1646                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1647                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1648                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1649             }
1650         } else { /* inter */
1651             int rowheight = p->ybsep*p->stride;
1652
1653             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1654
1655             for (i = 0; i < s->num_refs; i++) {
1656                 int ret = interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1657                 if (ret < 0)
1658                     return ret;
1659             }
1660
1661             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1662
1663             dsty = -p->yoffset;
1664             for (y = 0; y < s->blheight; y++) {
1665                 int h     = 0,
1666                     start = FFMAX(dsty, 0);
1667                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1668                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1669
1670                 init_obmc_weights(s, p, y);
1671
1672                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1673                     h = p->height - start;
1674                 else
1675                     h = p->ybsep - (start - dsty);
1676                 if (h < 0)
1677                     break;
1678
1679                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1680                 mc_row(s, blocks, mctmp, comp, dsty);
1681
1682                 mctmp += (start - dsty)*p->stride + p->xoffset;
1683                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1684                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1685                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1686
1687                 dsty += p->ybsep;
1688             }
1689         }
1690     }
1691
1692
1693     return 0;
1694 }
1695
1696 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1697 {
1698     int ret, i;
1699     int chroma_x_shift, chroma_y_shift;
1700     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1701
1702     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1703     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1704     ret = ff_get_buffer(avctx, f, flags);
1705     if (ret < 0)
1706         return ret;
1707
1708     for (i = 0; f->data[i]; i++) {
1709         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1710                      f->linesize[i] + 32;
1711         f->data[i] += offset;
1712     }
1713     f->width  = avctx->width;
1714     f->height = avctx->height;
1715
1716     return 0;
1717 }
1718
1719 /**
1720  * Dirac Specification ->
1721  * 11.1.1 Picture Header. picture_header()
1722  */
1723 static int dirac_decode_picture_header(DiracContext *s)
1724 {
1725     unsigned retire, picnum;
1726     int i, j, ret;
1727     int64_t refdist, refnum;
1728     GetBitContext *gb = &s->gb;
1729
1730     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1731     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1732
1733
1734     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1735
1736     /* if this is the first keyframe after a sequence header, start our
1737        reordering from here */
1738     if (s->frame_number < 0)
1739         s->frame_number = picnum;
1740
1741     s->ref_pics[0] = s->ref_pics[1] = NULL;
1742     for (i = 0; i < s->num_refs; i++) {
1743         refnum = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
1744         refdist = INT64_MAX;
1745
1746         /* find the closest reference to the one we want */
1747         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1748         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1749             if (s->ref_frames[j]
1750                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1751                 s->ref_pics[i] = s->ref_frames[j];
1752                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1753             }
1754
1755         if (!s->ref_pics[i] || refdist)
1756             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1757
1758         /* if there were no references at all, allocate one */
1759         if (!s->ref_pics[i])
1760             for (j = 0; j < MAX_FRAMES; j++)
1761                 if (!s->all_frames[j].avframe->data[0]) {
1762                     s->ref_pics[i] = &s->all_frames[j];
1763                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1764                     break;
1765                 }
1766
1767         if (!s->ref_pics[i]) {
1768             av_log(s->avctx, AV_LOG_ERROR, "Reference could not be allocated\n");
1769             return AVERROR_INVALIDDATA;
1770         }
1771
1772     }
1773
1774     /* retire the reference frames that are not used anymore */
1775     if (s->current_picture->reference) {
1776         retire = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
1777         if (retire != picnum) {
1778             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1779
1780             if (retire_pic)
1781                 retire_pic->reference &= DELAYED_PIC_REF;
1782             else
1783                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1784         }
1785
1786         /* if reference array is full, remove the oldest as per the spec */
1787         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1788             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1789             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->reference &= DELAYED_PIC_REF;
1790         }
1791     }
1792
1793     if (s->num_refs) {
1794         ret = dirac_unpack_prediction_parameters(s);  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1795         if (ret < 0)
1796             return ret;
1797         ret = dirac_unpack_block_motion_data(s);      /* [DIRAC_STD] 12. Block motion data syntax                       */
1798         if (ret < 0)
1799             return ret;
1800     }
1801     ret = dirac_unpack_idwt_params(s);                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1802     if (ret < 0)
1803         return ret;
1804
1805     init_planes(s);
1806     return 0;
1807 }
1808
1809 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1810 {
1811     DiracFrame *out = s->delay_frames[0];
1812     int i, out_idx  = 0;
1813     int ret;
1814
1815     /* find frame with lowest picture number */
1816     for (i = 1; s->delay_frames[i]; i++)
1817         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1818             out     = s->delay_frames[i];
1819             out_idx = i;
1820         }
1821
1822     for (i = out_idx; s->delay_frames[i]; i++)
1823         s->delay_frames[i] = s->delay_frames[i+1];
1824
1825     if (out) {
1826         out->reference ^= DELAYED_PIC_REF;
1827         *got_frame = 1;
1828         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1829             return ret;
1830     }
1831
1832     return 0;
1833 }
1834
1835 /**
1836  * Dirac Specification ->
1837  * 9.6 Parse Info Header Syntax. parse_info()
1838  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1839  */
1840 #define DATA_UNIT_HEADER_SIZE 13
1841
1842 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1843    inside the function parse_sequence() */
1844 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1845 {
1846     DiracContext *s   = avctx->priv_data;
1847     DiracFrame *pic   = NULL;
1848     int ret, i, parse_code;
1849     unsigned tmp;
1850
1851     if (size < DATA_UNIT_HEADER_SIZE)
1852         return AVERROR_INVALIDDATA;
1853
1854     parse_code = buf[4];
1855
1856     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1857
1858     if (parse_code == pc_seq_header) {
1859         if (s->seen_sequence_header)
1860             return 0;
1861
1862         /* [DIRAC_STD] 10. Sequence header */
1863         ret = avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source);
1864         if (ret < 0)
1865             return ret;
1866
1867         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1868
1869         ret = alloc_sequence_buffers(s);
1870         if (ret < 0)
1871             return ret;
1872
1873         s->seen_sequence_header = 1;
1874     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1875         free_sequence_buffers(s);
1876         s->seen_sequence_header = 0;
1877     } else if (parse_code == pc_aux_data) {
1878         if (buf[13] == 1) {     /* encoder implementation/version */
1879             int ver[3];
1880             /* versions older than 1.0.8 don't store quant delta for
1881                subbands with only one codeblock */
1882             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1883                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1884                     s->old_delta_quant = 1;
1885         }
1886     } else if (parse_code & 0x8) {  /* picture data unit */
1887         if (!s->seen_sequence_header) {
1888             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1889             return AVERROR_INVALIDDATA;
1890         }
1891
1892         /* find an unused frame */
1893         for (i = 0; i < MAX_FRAMES; i++)
1894             if (s->all_frames[i].avframe->data[0] == NULL)
1895                 pic = &s->all_frames[i];
1896         if (!pic) {
1897             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1898             return AVERROR_INVALIDDATA;
1899         }
1900
1901         av_frame_unref(pic->avframe);
1902
1903         /* [DIRAC_STD] Defined in 9.6.1 ... */
1904         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1905         if (tmp > 2) {
1906             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1907             return AVERROR_INVALIDDATA;
1908         }
1909         s->num_refs    = tmp;
1910         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1911         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1912         pic->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1913         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1914         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1915
1916         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1917             return ret;
1918         s->current_picture = pic;
1919         s->plane[0].stride = pic->avframe->linesize[0];
1920         s->plane[1].stride = pic->avframe->linesize[1];
1921         s->plane[2].stride = pic->avframe->linesize[2];
1922
1923         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1924             return AVERROR(ENOMEM);
1925
1926         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1927         ret = dirac_decode_picture_header(s);
1928         if (ret < 0)
1929             return ret;
1930
1931         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1932         ret = dirac_decode_frame_internal(s);
1933         if (ret < 0)
1934             return ret;
1935     }
1936     return 0;
1937 }
1938
1939 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1940 {
1941     DiracContext *s     = avctx->priv_data;
1942     AVFrame *picture    = data;
1943     uint8_t *buf        = pkt->data;
1944     int buf_size        = pkt->size;
1945     int i, buf_idx      = 0;
1946     int ret;
1947     unsigned data_unit_size;
1948
1949     /* release unused frames */
1950     for (i = 0; i < MAX_FRAMES; i++)
1951         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].reference) {
1952             av_frame_unref(s->all_frames[i].avframe);
1953             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1954         }
1955
1956     s->current_picture = NULL;
1957     *got_frame = 0;
1958
1959     /* end of stream, so flush delayed pics */
1960     if (buf_size == 0)
1961         return get_delayed_pic(s, (AVFrame *)data, got_frame);
1962
1963     for (;;) {
1964         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1965           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1966           BBCD start code search */
1967         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1968             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1969                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1970                 break;
1971         }
1972         /* BBCD found or end of data */
1973         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1974             break;
1975
1976         data_unit_size = AV_RB32(buf+buf_idx+5);
1977         if (data_unit_size > buf_size - buf_idx || !data_unit_size) {
1978             if(data_unit_size > buf_size - buf_idx)
1979             av_log(s->avctx, AV_LOG_ERROR,
1980                    "Data unit with size %d is larger than input buffer, discarding\n",
1981                    data_unit_size);
1982             buf_idx += 4;
1983             continue;
1984         }
1985         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1986         ret = dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size);
1987         if (ret < 0)
1988         {
1989             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1990             return ret;
1991         }
1992         buf_idx += data_unit_size;
1993     }
1994
1995     if (!s->current_picture)
1996         return buf_size;
1997
1998     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1999         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
2000
2001         s->current_picture->reference |= DELAYED_PIC_REF;
2002
2003         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
2004             int min_num = s->delay_frames[0]->avframe->display_picture_number;
2005             /* Too many delayed frames, so we display the frame with the lowest pts */
2006             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
2007
2008             for (i = 1; s->delay_frames[i]; i++)
2009                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
2010                     min_num = s->delay_frames[i]->avframe->display_picture_number;
2011
2012             delayed_frame = remove_frame(s->delay_frames, min_num);
2013             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
2014         }
2015
2016         if (delayed_frame) {
2017             delayed_frame->reference ^= DELAYED_PIC_REF;
2018             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
2019                 return ret;
2020             *got_frame = 1;
2021         }
2022     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
2023         /* The right frame at the right time :-) */
2024         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
2025             return ret;
2026         *got_frame = 1;
2027     }
2028
2029     if (*got_frame)
2030         s->frame_number = picture->display_picture_number + 1;
2031
2032     return buf_idx;
2033 }
2034
2035 AVCodec ff_dirac_decoder = {
2036     .name           = "dirac",
2037     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
2038     .type           = AVMEDIA_TYPE_VIDEO,
2039     .id             = AV_CODEC_ID_DIRAC,
2040     .priv_data_size = sizeof(DiracContext),
2041     .init           = dirac_decode_init,
2042     .close          = dirac_decode_end,
2043     .decode         = dirac_decode_frame,
2044     .capabilities   = AV_CODEC_CAP_DELAY,
2045     .flush          = dirac_decode_flush,
2046 };