git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32 #include "internal.h"
  33 #include "golomb.h"
  34 #include "dirac_arith.h"
  35 #include "mpeg12data.h"
  36 #include "libavcodec/mpegvideo.h"
  37 #include "mpegvideoencdsp.h"
  38 #include "dirac_dwt.h"
  39 #include "dirac.h"
  40 #include "diracdsp.h"
  41 #include "videodsp.h"
  42
  43 /**
  44  * The spec limits the number of wavelet decompositions to 4 for both
  45  * level 1 (VC-2) and 128 (long-gop default).
  46  * 5 decompositions is the maximum before >16-bit buffers are needed.
  47  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  48  * the others to 4 decompositions (or 3 for the fidelity filter).
  49  *
  50  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  51  */
  52 #define MAX_DWT_LEVELS 5
  53
  54 /**
  55  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  56  */
  57 #define MAX_REFERENCE_FRAMES 8
  58 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  59 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  60 #define MAX_QUANT 68        /* max quant for VC-2 */
  61 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  62
  63 /**
  64  * DiracBlock->ref flags, if set then the block does MC from the given ref
  65  */
  66 #define DIRAC_REF_MASK_REF1   1
  67 #define DIRAC_REF_MASK_REF2   2
  68 #define DIRAC_REF_MASK_GLOBAL 4
  69
  70 /**
  71  * Value of Picture.reference when Picture is not a reference picture, but
  72  * is held for delayed output.
  73  */
  74 #define DELAYED_PIC_REF 4
  75
  76 #define CALC_PADDING(size, depth)                       \
  77     (((size + (1 << depth) - 1) >> depth) << depth)
  78
  79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  80
  81 typedef struct {
  82     AVFrame *avframe;
  83     int interpolated[3];    /* 1 if hpel[] is valid */
  84     uint8_t *hpel[3][4];
  85     uint8_t *hpel_base[3][4];
  86 } DiracFrame;
  87
  88 typedef struct {
  89     union {
  90         int16_t mv[2][2];
  91         int16_t dc[3];
  92     } u; /* anonymous unions aren't in C99 :( */
  93     uint8_t ref;
  94 } DiracBlock;
  95
  96 typedef struct SubBand {
  97     int level;
  98     int orientation;
  99     int stride;
 100     int width;
 101     int height;
 102     int quant;
 103     IDWTELEM *ibuf;
 104     struct SubBand *parent;
 105
 106     /* for low delay */
 107     unsigned length;
 108     const uint8_t *coeff_data;
 109 } SubBand;
 110
 111 typedef struct Plane {
 112     int width;
 113     int height;
 114     ptrdiff_t stride;
 115
 116     int idwt_width;
 117     int idwt_height;
 118     int idwt_stride;
 119     IDWTELEM *idwt_buf;
 120     IDWTELEM *idwt_buf_base;
 121     IDWTELEM *idwt_tmp;
 122
 123     /* block length */
 124     uint8_t xblen;
 125     uint8_t yblen;
 126     /* block separation (block n+1 starts after this many pixels in block n) */
 127     uint8_t xbsep;
 128     uint8_t ybsep;
 129     /* amount of overspill on each edge (half of the overlap between blocks) */
 130     uint8_t xoffset;
 131     uint8_t yoffset;
 132
 133     SubBand band[MAX_DWT_LEVELS][4];
 134 } Plane;
 135
 136 typedef struct DiracContext {
 137     AVCodecContext *avctx;
 138     MpegvideoEncDSPContext mpvencdsp;
 139     VideoDSPContext vdsp;
 140     DiracDSPContext diracdsp;
 141     GetBitContext gb;
 142     dirac_source_params source;
 143     int seen_sequence_header;
 144     int frame_number;           /* number of the next frame to display       */
 145     Plane plane[3];
 146     int chroma_x_shift;
 147     int chroma_y_shift;
 148
 149     int zero_res;               /* zero residue flag                         */
 150     int is_arith;               /* whether coeffs use arith or golomb coding */
 151     int low_delay;              /* use the low delay syntax                  */
 152     int globalmc_flag;          /* use global motion compensation            */
 153     int num_refs;               /* number of reference pictures              */
 154
 155     /* wavelet decoding */
 156     unsigned wavelet_depth;     /* depth of the IDWT                         */
 157     unsigned wavelet_idx;
 158
 159     /**
 160      * schroedinger older than 1.0.8 doesn't store
 161      * quant delta if only one codebook exists in a band
 162      */
 163     unsigned old_delta_quant;
 164     unsigned codeblock_mode;
 165
 166     struct {
 167         unsigned width;
 168         unsigned height;
 169     } codeblock[MAX_DWT_LEVELS+1];
 170
 171     struct {
 172         unsigned num_x;         /* number of horizontal slices               */
 173         unsigned num_y;         /* number of vertical slices                 */
 174         AVRational bytes;       /* average bytes per slice                   */
 175         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 176     } lowdelay;
 177
 178     struct {
 179         int pan_tilt[2];        /* pan/tilt vector                           */
 180         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 181         int perspective[2];     /* perspective vector                        */
 182         unsigned zrs_exp;
 183         unsigned perspective_exp;
 184     } globalmc[2];
 185
 186     /* motion compensation */
 187     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 188     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 189     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 190
 191     int blwidth;                /* number of blocks (horizontally)           */
 192     int blheight;               /* number of blocks (vertically)             */
 193     int sbwidth;                /* number of superblocks (horizontally)      */
 194     int sbheight;               /* number of superblocks (vertically)        */
 195
 196     uint8_t *sbsplit;
 197     DiracBlock *blmotion;
 198
 199     uint8_t *edge_emu_buffer[4];
 200     uint8_t *edge_emu_buffer_base;
 201
 202     uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
 203     uint8_t *mcscratch;
 204     int buffer_stride;
 205
 206     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 207
 208     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 209     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 210     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 211     dirac_weight_func weight_func;
 212     dirac_biweight_func biweight_func;
 213
 214     DiracFrame *current_picture;
 215     DiracFrame *ref_pics[2];
 216
 217     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 218     DiracFrame *delay_frames[MAX_DELAY+1];
 219     DiracFrame all_frames[MAX_FRAMES];
 220 } DiracContext;
 221
 222 /**
 223  * Dirac Specification ->
 224  * Parse code values. 9.6.1 Table 9.1
 225  */
 226 enum dirac_parse_code {
 227     pc_seq_header         = 0x00,
 228     pc_eos                = 0x10,
 229     pc_aux_data           = 0x20,
 230     pc_padding            = 0x30,
 231 };
 232
 233 enum dirac_subband {
 234     subband_ll = 0,
 235     subband_hl = 1,
 236     subband_lh = 2,
 237     subband_hh = 3
 238 };
 239
 240 static const uint8_t default_qmat[][4][4] = {
 241     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 242     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 243     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 244     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 245     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 246     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 247     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 248 };
 249
 250 static const int qscale_tab[MAX_QUANT+1] = {
 251     4,     5,     6,     7,     8,    10,    11,    13,
 252     16,    19,    23,    27,    32,    38,    45,    54,
 253     64,    76,    91,   108,   128,   152,   181,   215,
 254     256,   304,   362,   431,   512,   609,   724,   861,
 255     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 256     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 257     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 258     65536, 77936
 259 };
 260
 261 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 262     1,     2,     3,     4,     4,     5,     6,     7,
 263     8,    10,    12,    14,    16,    19,    23,    27,
 264     32,    38,    46,    54,    64,    76,    91,   108,
 265     128,   152,   181,   216,   256,   305,   362,   431,
 266     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 267     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 268     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 269     32768, 38968
 270 };
 271
 272 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 273     1,     2,     2,     3,     3,     4,     4,     5,
 274     6,     7,     9,    10,    12,    14,    17,    20,
 275     24,    29,    34,    41,    48,    57,    68,    81,
 276     96,   114,   136,   162,   192,   228,   272,   323,
 277     384,   457,   543,   646,   768,   913,  1086,  1292,
 278     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 279     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 280     24576, 29226
 281 };
 282
 283 /* magic number division by 3 from schroedinger */
 284 static inline int divide3(int x)
 285 {
 286     return ((x+1)*21845 + 10922) >> 16;
 287 }
 288
 289 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 290 {
 291     DiracFrame *remove_pic = NULL;
 292     int i, remove_idx = -1;
 293
 294     for (i = 0; framelist[i]; i++)
 295         if (framelist[i]->avframe->display_picture_number == picnum) {
 296             remove_pic = framelist[i];
 297             remove_idx = i;
 298         }
 299
 300     if (remove_pic)
 301         for (i = remove_idx; framelist[i]; i++)
 302             framelist[i] = framelist[i+1];
 303
 304     return remove_pic;
 305 }
 306
 307 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 308 {
 309     int i;
 310     for (i = 0; i < maxframes; i++)
 311         if (!framelist[i]) {
 312             framelist[i] = frame;
 313             return 0;
 314         }
 315     return -1;
 316 }
 317
 318 static int alloc_sequence_buffers(DiracContext *s)
 319 {
 320     int sbwidth  = DIVRNDUP(s->source.width,  4);
 321     int sbheight = DIVRNDUP(s->source.height, 4);
 322     int i, w, h, top_padding;
 323
 324     /* todo: think more about this / use or set Plane here */
 325     for (i = 0; i < 3; i++) {
 326         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 327         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 328         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 329         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 330
 331         /* we allocate the max we support here since num decompositions can
 332          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 333          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 334          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 335          * on each side */
 336         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 337         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 338         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 339
 340         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
 341         s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
 342         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 343         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 344             return AVERROR(ENOMEM);
 345     }
 346
 347     /* fixme: allocate using real stride here */
 348     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 349     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 350
 351     if (!s->sbsplit || !s->blmotion)
 352         return AVERROR(ENOMEM);
 353     return 0;
 354 }
 355
 356 static int alloc_buffers(DiracContext *s, int stride)
 357 {
 358     int w = s->source.width;
 359     int h = s->source.height;
 360
 361     av_assert0(stride >= w);
 362     stride += 64;
 363
 364     if (s->buffer_stride >= stride)
 365         return 0;
 366     s->buffer_stride = 0;
 367
 368     av_freep(&s->edge_emu_buffer_base);
 369     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 370     av_freep(&s->mctmp);
 371     av_freep(&s->mcscratch);
 372
 373     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 374
 375     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 376     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 377
 378     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 379         return AVERROR(ENOMEM);
 380
 381     s->buffer_stride = stride;
 382     return 0;
 383 }
 384
 385 static void free_sequence_buffers(DiracContext *s)
 386 {
 387     int i, j, k;
 388
 389     for (i = 0; i < MAX_FRAMES; i++) {
 390         if (s->all_frames[i].avframe->data[0]) {
 391             av_frame_unref(s->all_frames[i].avframe);
 392             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 393         }
 394
 395         for (j = 0; j < 3; j++)
 396             for (k = 1; k < 4; k++)
 397                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 398     }
 399
 400     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 401     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 402
 403     for (i = 0; i < 3; i++) {
 404         av_freep(&s->plane[i].idwt_buf_base);
 405         av_freep(&s->plane[i].idwt_tmp);
 406     }
 407
 408     s->buffer_stride = 0;
 409     av_freep(&s->sbsplit);
 410     av_freep(&s->blmotion);
 411     av_freep(&s->edge_emu_buffer_base);
 412
 413     av_freep(&s->mctmp);
 414     av_freep(&s->mcscratch);
 415 }
 416
 417 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 418 {
 419     DiracContext *s = avctx->priv_data;
 420     int i;
 421
 422     s->avctx = avctx;
 423     s->frame_number = -1;
 424
 425     ff_diracdsp_init(&s->diracdsp);
 426     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 427     ff_videodsp_init(&s->vdsp, 8);
 428
 429     for (i = 0; i < MAX_FRAMES; i++) {
 430         s->all_frames[i].avframe = av_frame_alloc();
 431         if (!s->all_frames[i].avframe) {
 432             while (i > 0)
 433                 av_frame_free(&s->all_frames[--i].avframe);
 434             return AVERROR(ENOMEM);
 435         }
 436     }
 437
 438     return 0;
 439 }
 440
 441 static void dirac_decode_flush(AVCodecContext *avctx)
 442 {
 443     DiracContext *s = avctx->priv_data;
 444     free_sequence_buffers(s);
 445     s->seen_sequence_header = 0;
 446     s->frame_number = -1;
 447 }
 448
 449 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 450 {
 451     DiracContext *s = avctx->priv_data;
 452     int i;
 453
 454     dirac_decode_flush(avctx);
 455     for (i = 0; i < MAX_FRAMES; i++)
 456         av_frame_free(&s->all_frames[i].avframe);
 457
 458     return 0;
 459 }
 460
 461 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 462
 463 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 464                                       SubBand *b, IDWTELEM *buf, int x, int y)
 465 {
 466     int coeff, sign;
 467     int sign_pred = 0;
 468     int pred_ctx = CTX_ZPZN_F1;
 469
 470     /* Check if the parent subband has a 0 in the corresponding position */
 471     if (b->parent)
 472         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 473
 474     if (b->orientation == subband_hl)
 475         sign_pred = buf[-b->stride];
 476
 477     /* Determine if the pixel has only zeros in its neighbourhood */
 478     if (x) {
 479         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 480         if (b->orientation == subband_lh)
 481             sign_pred = buf[-1];
 482     } else {
 483         pred_ctx += !buf[-b->stride];
 484     }
 485
 486     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 487     if (coeff) {
 488         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 489         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 490         coeff = (coeff ^ -sign) + sign;
 491     }
 492     *buf = coeff;
 493 }
 494
 495 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 496 {
 497     int sign, coeff;
 498
 499     coeff = svq3_get_ue_golomb(gb);
 500     if (coeff) {
 501         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 502         sign  = get_bits1(gb);
 503         coeff = (coeff ^ -sign) + sign;
 504     }
 505     return coeff;
 506 }
 507
 508 /**
 509  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 510  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 511  */
 512 static inline void codeblock(DiracContext *s, SubBand *b,
 513                              GetBitContext *gb, DiracArith *c,
 514                              int left, int right, int top, int bottom,
 515                              int blockcnt_one, int is_arith)
 516 {
 517     int x, y, zero_block;
 518     int qoffset, qfactor;
 519     IDWTELEM *buf;
 520
 521     /* check for any coded coefficients in this codeblock */
 522     if (!blockcnt_one) {
 523         if (is_arith)
 524             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 525         else
 526             zero_block = get_bits1(gb);
 527
 528         if (zero_block)
 529             return;
 530     }
 531
 532     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 533         int quant = b->quant;
 534         if (is_arith)
 535             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 536         else
 537             quant += dirac_get_se_golomb(gb);
 538         if (quant < 0) {
 539             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 540             return;
 541         }
 542         b->quant = quant;
 543     }
 544
 545     b->quant = FFMIN(b->quant, MAX_QUANT);
 546
 547     qfactor = qscale_tab[b->quant];
 548     /* TODO: context pointer? */
 549     if (!s->num_refs)
 550         qoffset = qoffset_intra_tab[b->quant];
 551     else
 552         qoffset = qoffset_inter_tab[b->quant];
 553
 554     buf = b->ibuf + top * b->stride;
 555     for (y = top; y < bottom; y++) {
 556         for (x = left; x < right; x++) {
 557             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 558             if (is_arith)
 559                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 560             else
 561                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 562         }
 563         buf += b->stride;
 564     }
 565 }
 566
 567 /**
 568  * Dirac Specification ->
 569  * 13.3 intra_dc_prediction(band)
 570  */
 571 static inline void intra_dc_prediction(SubBand *b)
 572 {
 573     IDWTELEM *buf = b->ibuf;
 574     int x, y;
 575
 576     for (x = 1; x < b->width; x++)
 577         buf[x] += buf[x-1];
 578     buf += b->stride;
 579
 580     for (y = 1; y < b->height; y++) {
 581         buf[0] += buf[-b->stride];
 582
 583         for (x = 1; x < b->width; x++) {
 584             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 585             buf[x]  += divide3(pred);
 586         }
 587         buf += b->stride;
 588     }
 589 }
 590
 591 /**
 592  * Dirac Specification ->
 593  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 594  */
 595 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 596 {
 597     int cb_x, cb_y, left, right, top, bottom;
 598     DiracArith c;
 599     GetBitContext gb;
 600     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 601     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 602     int blockcnt_one = (cb_width + cb_height) == 2;
 603
 604     if (!b->length)
 605         return;
 606
 607     init_get_bits8(&gb, b->coeff_data, b->length);
 608
 609     if (is_arith)
 610         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 611
 612     top = 0;
 613     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 614         bottom = (b->height * (cb_y+1)) / cb_height;
 615         left = 0;
 616         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 617             right = (b->width * (cb_x+1)) / cb_width;
 618             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 619             left = right;
 620         }
 621         top = bottom;
 622     }
 623
 624     if (b->orientation == subband_ll && s->num_refs == 0)
 625         intra_dc_prediction(b);
 626 }
 627
 628 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 629 {
 630     DiracContext *s = avctx->priv_data;
 631     decode_subband_internal(s, b, 1);
 632     return 0;
 633 }
 634
 635 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 636 {
 637     DiracContext *s = avctx->priv_data;
 638     SubBand **b     = arg;
 639     decode_subband_internal(s, *b, 0);
 640     return 0;
 641 }
 642
 643 /**
 644  * Dirac Specification ->
 645  * [DIRAC_STD] 13.4.1 core_transform_data()
 646  */
 647 static void decode_component(DiracContext *s, int comp)
 648 {
 649     AVCodecContext *avctx = s->avctx;
 650     SubBand *bands[3*MAX_DWT_LEVELS+1];
 651     enum dirac_subband orientation;
 652     int level, num_bands = 0;
 653
 654     /* Unpack all subbands at all levels. */
 655     for (level = 0; level < s->wavelet_depth; level++) {
 656         for (orientation = !!level; orientation < 4; orientation++) {
 657             SubBand *b = &s->plane[comp].band[level][orientation];
 658             bands[num_bands++] = b;
 659
 660             align_get_bits(&s->gb);
 661             /* [DIRAC_STD] 13.4.2 subband() */
 662             b->length = svq3_get_ue_golomb(&s->gb);
 663             if (b->length) {
 664                 b->quant = svq3_get_ue_golomb(&s->gb);
 665                 align_get_bits(&s->gb);
 666                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 667                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 668                 skip_bits_long(&s->gb, b->length*8);
 669             }
 670         }
 671         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 672         if (s->is_arith)
 673             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 674                            NULL, 4-!!level, sizeof(SubBand));
 675     }
 676     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 677     if (!s->is_arith)
 678         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 679 }
 680
 681 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 682 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 683 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 684                              int slice_x, int slice_y, int bits_end,
 685                              SubBand *b1, SubBand *b2)
 686 {
 687     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 688     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 689     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 690     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 691
 692     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 693     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 694
 695     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 696     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 697     int x, y;
 698     /* we have to constantly check for overread since the spec explicitly
 699        requires this, with the meaning that all remaining coeffs are set to 0 */
 700     if (get_bits_count(gb) >= bits_end)
 701         return;
 702
 703     for (y = top; y < bottom; y++) {
 704         for (x = left; x < right; x++) {
 705             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 706             if (get_bits_count(gb) >= bits_end)
 707                 return;
 708             if (buf2) {
 709                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 710                 if (get_bits_count(gb) >= bits_end)
 711                     return;
 712             }
 713         }
 714         buf1 += b1->stride;
 715         if (buf2)
 716             buf2 += b2->stride;
 717     }
 718 }
 719
 720 struct lowdelay_slice {
 721     GetBitContext gb;
 722     int slice_x;
 723     int slice_y;
 724     int bytes;
 725 };
 726
 727
 728 /**
 729  * Dirac Specification ->
 730  * 13.5.2 Slices. slice(sx,sy)
 731  */
 732 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 733 {
 734     DiracContext *s = avctx->priv_data;
 735     struct lowdelay_slice *slice = arg;
 736     GetBitContext *gb = &slice->gb;
 737     enum dirac_subband orientation;
 738     int level, quant, chroma_bits, chroma_end;
 739
 740     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 741     int length_bits = av_log2(8 * slice->bytes)+1;
 742     int luma_bits   = get_bits_long(gb, length_bits);
 743     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 744
 745     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 746     for (level = 0; level < s->wavelet_depth; level++)
 747         for (orientation = !!level; orientation < 4; orientation++) {
 748             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 749             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 750                              &s->plane[0].band[level][orientation], NULL);
 751         }
 752
 753     /* consume any unused bits from luma */
 754     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 755
 756     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 757     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 758     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 759     for (level = 0; level < s->wavelet_depth; level++)
 760         for (orientation = !!level; orientation < 4; orientation++) {
 761             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 762             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 763                              &s->plane[1].band[level][orientation],
 764                              &s->plane[2].band[level][orientation]);
 765         }
 766
 767     return 0;
 768 }
 769
 770 /**
 771  * Dirac Specification ->
 772  * 13.5.1 low_delay_transform_data()
 773  */
 774 static void decode_lowdelay(DiracContext *s)
 775 {
 776     AVCodecContext *avctx = s->avctx;
 777     int slice_x, slice_y, bytes, bufsize;
 778     const uint8_t *buf;
 779     struct lowdelay_slice *slices;
 780     int slice_num = 0;
 781
 782     slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 783
 784     align_get_bits(&s->gb);
 785     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 786     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 787     bufsize = get_bits_left(&s->gb);
 788
 789     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 790         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 791             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 792                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 793
 794             slices[slice_num].bytes   = bytes;
 795             slices[slice_num].slice_x = slice_x;
 796             slices[slice_num].slice_y = slice_y;
 797             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 798             slice_num++;
 799
 800             buf     += bytes;
 801             bufsize -= bytes*8;
 802         }
 803
 804     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 805                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 806     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 807     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 808     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 809     av_free(slices);
 810 }
 811
 812 static void init_planes(DiracContext *s)
 813 {
 814     int i, w, h, level, orientation;
 815
 816     for (i = 0; i < 3; i++) {
 817         Plane *p = &s->plane[i];
 818
 819         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 820         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 821         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 822         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 823         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 824
 825         for (level = s->wavelet_depth-1; level >= 0; level--) {
 826             w = w>>1;
 827             h = h>>1;
 828             for (orientation = !!level; orientation < 4; orientation++) {
 829                 SubBand *b = &p->band[level][orientation];
 830
 831                 b->ibuf   = p->idwt_buf;
 832                 b->level  = level;
 833                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 834                 b->width  = w;
 835                 b->height = h;
 836                 b->orientation = orientation;
 837
 838                 if (orientation & 1)
 839                     b->ibuf += w;
 840                 if (orientation > 1)
 841                     b->ibuf += b->stride>>1;
 842
 843                 if (level)
 844                     b->parent = &p->band[level-1][orientation];
 845             }
 846         }
 847
 848         if (i > 0) {
 849             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 850             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 851             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 852             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 853         }
 854
 855         p->xoffset = (p->xblen - p->xbsep)/2;
 856         p->yoffset = (p->yblen - p->ybsep)/2;
 857     }
 858 }
 859
 860 /**
 861  * Unpack the motion compensation parameters
 862  * Dirac Specification ->
 863  * 11.2 Picture prediction data. picture_prediction()
 864  */
 865 static int dirac_unpack_prediction_parameters(DiracContext *s)
 866 {
 867     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 868     static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
 869
 870     GetBitContext *gb = &s->gb;
 871     unsigned idx, ref;
 872
 873     align_get_bits(gb);
 874     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 875     /* Luma and Chroma are equal. 11.2.3 */
 876     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 877
 878     if (idx > 4) {
 879         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 880         return -1;
 881     }
 882
 883     if (idx == 0) {
 884         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 885         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 886         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 887         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 888     } else {
 889         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 890         s->plane[0].xblen = default_blen[idx-1];
 891         s->plane[0].yblen = default_blen[idx-1];
 892         s->plane[0].xbsep = default_bsep[idx-1];
 893         s->plane[0].ybsep = default_bsep[idx-1];
 894     }
 895     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 896       Calculated in function dirac_unpack_block_motion_data */
 897
 898     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 899         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 900         return -1;
 901     }
 902     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 903         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 904         return -1;
 905     }
 906     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 907         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 908         return -1;
 909     }
 910
 911     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 912       Read motion vector precision */
 913     s->mv_precision = svq3_get_ue_golomb(gb);
 914     if (s->mv_precision > 3) {
 915         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 916         return -1;
 917     }
 918
 919     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 920       Read the global motion compensation parameters */
 921     s->globalmc_flag = get_bits1(gb);
 922     if (s->globalmc_flag) {
 923         memset(s->globalmc, 0, sizeof(s->globalmc));
 924         /* [DIRAC_STD] pan_tilt(gparams) */
 925         for (ref = 0; ref < s->num_refs; ref++) {
 926             if (get_bits1(gb)) {
 927                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 928                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 929             }
 930             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 931                zoom/rotation/shear parameters */
 932             if (get_bits1(gb)) {
 933                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 934                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 935                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 936                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 937                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 938             } else {
 939                 s->globalmc[ref].zrs[0][0] = 1;
 940                 s->globalmc[ref].zrs[1][1] = 1;
 941             }
 942             /* [DIRAC_STD] perspective(gparams) */
 943             if (get_bits1(gb)) {
 944                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 945                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 946                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 947             }
 948         }
 949     }
 950
 951     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 952       Picture prediction mode, not currently used. */
 953     if (svq3_get_ue_golomb(gb)) {
 954         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 955         return -1;
 956     }
 957
 958     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 959        just data read, weight calculation will be done later on. */
 960     s->weight_log2denom = 1;
 961     s->weight[0]        = 1;
 962     s->weight[1]        = 1;
 963
 964     if (get_bits1(gb)) {
 965         s->weight_log2denom = svq3_get_ue_golomb(gb);
 966         s->weight[0] = dirac_get_se_golomb(gb);
 967         if (s->num_refs == 2)
 968             s->weight[1] = dirac_get_se_golomb(gb);
 969     }
 970     return 0;
 971 }
 972
 973 /**
 974  * Dirac Specification ->
 975  * 11.3 Wavelet transform data. wavelet_transform()
 976  */
 977 static int dirac_unpack_idwt_params(DiracContext *s)
 978 {
 979     GetBitContext *gb = &s->gb;
 980     int i, level;
 981     unsigned tmp;
 982
 983 #define CHECKEDREAD(dst, cond, errmsg) \
 984     tmp = svq3_get_ue_golomb(gb); \
 985     if (cond) { \
 986         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
 987         return -1; \
 988     }\
 989     dst = tmp;
 990
 991     align_get_bits(gb);
 992
 993     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
 994     if (s->zero_res)
 995         return 0;
 996
 997     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
 998     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
 999
1000     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1001
1002     if (!s->low_delay) {
1003         /* Codeblock parameters (core syntax only) */
1004         if (get_bits1(gb)) {
1005             for (i = 0; i <= s->wavelet_depth; i++) {
1006                 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
1007                 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
1008             }
1009
1010             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1011         } else
1012             for (i = 0; i <= s->wavelet_depth; i++)
1013                 s->codeblock[i].width = s->codeblock[i].height = 1;
1014     } else {
1015         /* Slice parameters + quantization matrix*/
1016         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1017         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
1018         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
1019         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1020         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1021
1022         if (s->lowdelay.bytes.den <= 0) {
1023             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1024             return AVERROR_INVALIDDATA;
1025         }
1026
1027         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1028         if (get_bits1(gb)) {
1029             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1030             /* custom quantization matrix */
1031             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1032             for (level = 0; level < s->wavelet_depth; level++) {
1033                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1034                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1035                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1036             }
1037         } else {
1038             if (s->wavelet_depth > 4) {
1039                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1040                 return AVERROR_INVALIDDATA;
1041             }
1042             /* default quantization matrix */
1043             for (level = 0; level < s->wavelet_depth; level++)
1044                 for (i = 0; i < 4; i++) {
1045                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1046                     /* haar with no shift differs for different depths */
1047                     if (s->wavelet_idx == 3)
1048                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1049                 }
1050         }
1051     }
1052     return 0;
1053 }
1054
1055 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1056 {
1057     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1058
1059     if (!(x|y))
1060         return 0;
1061     else if (!y)
1062         return sbsplit[-1];
1063     else if (!x)
1064         return sbsplit[-stride];
1065
1066     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1067 }
1068
1069 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1070 {
1071     int pred;
1072
1073     if (!(x|y))
1074         return 0;
1075     else if (!y)
1076         return block[-1].ref & refmask;
1077     else if (!x)
1078         return block[-stride].ref & refmask;
1079
1080     /* return the majority */
1081     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1082     return (pred >> 1) & refmask;
1083 }
1084
1085 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1086 {
1087     int i, n = 0;
1088
1089     memset(block->u.dc, 0, sizeof(block->u.dc));
1090
1091     if (x && !(block[-1].ref & 3)) {
1092         for (i = 0; i < 3; i++)
1093             block->u.dc[i] += block[-1].u.dc[i];
1094         n++;
1095     }
1096
1097     if (y && !(block[-stride].ref & 3)) {
1098         for (i = 0; i < 3; i++)
1099             block->u.dc[i] += block[-stride].u.dc[i];
1100         n++;
1101     }
1102
1103     if (x && y && !(block[-1-stride].ref & 3)) {
1104         for (i = 0; i < 3; i++)
1105             block->u.dc[i] += block[-1-stride].u.dc[i];
1106         n++;
1107     }
1108
1109     if (n == 2) {
1110         for (i = 0; i < 3; i++)
1111             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1112     } else if (n == 3) {
1113         for (i = 0; i < 3; i++)
1114             block->u.dc[i] = divide3(block->u.dc[i]);
1115     }
1116 }
1117
1118 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1119 {
1120     int16_t *pred[3];
1121     int refmask = ref+1;
1122     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1123     int n = 0;
1124
1125     if (x && (block[-1].ref & mask) == refmask)
1126         pred[n++] = block[-1].u.mv[ref];
1127
1128     if (y && (block[-stride].ref & mask) == refmask)
1129         pred[n++] = block[-stride].u.mv[ref];
1130
1131     if (x && y && (block[-stride-1].ref & mask) == refmask)
1132         pred[n++] = block[-stride-1].u.mv[ref];
1133
1134     switch (n) {
1135     case 0:
1136         block->u.mv[ref][0] = 0;
1137         block->u.mv[ref][1] = 0;
1138         break;
1139     case 1:
1140         block->u.mv[ref][0] = pred[0][0];
1141         block->u.mv[ref][1] = pred[0][1];
1142         break;
1143     case 2:
1144         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1145         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1146         break;
1147     case 3:
1148         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1149         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1150         break;
1151     }
1152 }
1153
1154 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1155 {
1156     int ez      = s->globalmc[ref].zrs_exp;
1157     int ep      = s->globalmc[ref].perspective_exp;
1158     int (*A)[2] = s->globalmc[ref].zrs;
1159     int *b      = s->globalmc[ref].pan_tilt;
1160     int *c      = s->globalmc[ref].perspective;
1161
1162     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1163     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1164     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1165
1166     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1167     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1168 }
1169
1170 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1171                                 int stride, int x, int y)
1172 {
1173     int i;
1174
1175     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1176     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1177
1178     if (s->num_refs == 2) {
1179         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1180         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1181     }
1182
1183     if (!block->ref) {
1184         pred_block_dc(block, stride, x, y);
1185         for (i = 0; i < 3; i++)
1186             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1187         return;
1188     }
1189
1190     if (s->globalmc_flag) {
1191         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1192         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1193     }
1194
1195     for (i = 0; i < s->num_refs; i++)
1196         if (block->ref & (i+1)) {
1197             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1198                 global_mv(s, block, x, y, i);
1199             } else {
1200                 pred_mv(block, stride, x, y, i);
1201                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1202                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1203             }
1204         }
1205 }
1206
1207 /**
1208  * Copies the current block to the other blocks covered by the current superblock split mode
1209  */
1210 static void propagate_block_data(DiracBlock *block, int stride, int size)
1211 {
1212     int x, y;
1213     DiracBlock *dst = block;
1214
1215     for (x = 1; x < size; x++)
1216         dst[x] = *block;
1217
1218     for (y = 1; y < size; y++) {
1219         dst += stride;
1220         for (x = 0; x < size; x++)
1221             dst[x] = *block;
1222     }
1223 }
1224
1225 /**
1226  * Dirac Specification ->
1227  * 12. Block motion data syntax
1228  */
1229 static int dirac_unpack_block_motion_data(DiracContext *s)
1230 {
1231     GetBitContext *gb = &s->gb;
1232     uint8_t *sbsplit = s->sbsplit;
1233     int i, x, y, q, p;
1234     DiracArith arith[8];
1235
1236     align_get_bits(gb);
1237
1238     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1239     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1240     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1241     s->blwidth  = 4 * s->sbwidth;
1242     s->blheight = 4 * s->sbheight;
1243
1244     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1245        decode superblock split modes */
1246     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1247     for (y = 0; y < s->sbheight; y++) {
1248         for (x = 0; x < s->sbwidth; x++) {
1249             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1250             if (split > 2)
1251                 return -1;
1252             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1253         }
1254         sbsplit += s->sbwidth;
1255     }
1256
1257     /* setup arith decoding */
1258     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1259     for (i = 0; i < s->num_refs; i++) {
1260         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1261         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1262     }
1263     for (i = 0; i < 3; i++)
1264         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1265
1266     for (y = 0; y < s->sbheight; y++)
1267         for (x = 0; x < s->sbwidth; x++) {
1268             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1269             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1270
1271             for (q = 0; q < blkcnt; q++)
1272                 for (p = 0; p < blkcnt; p++) {
1273                     int bx = 4 * x + p*step;
1274                     int by = 4 * y + q*step;
1275                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1276                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1277                     propagate_block_data(block, s->blwidth, step);
1278                 }
1279         }
1280
1281     return 0;
1282 }
1283
1284 static int weight(int i, int blen, int offset)
1285 {
1286 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1287     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1288
1289     if (i < 2*offset)
1290         return ROLLOFF(i);
1291     else if (i > blen-1 - 2*offset)
1292         return ROLLOFF(blen-1 - i);
1293     return 8;
1294 }
1295
1296 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1297                                  int left, int right, int wy)
1298 {
1299     int x;
1300     for (x = 0; left && x < p->xblen >> 1; x++)
1301         obmc_weight[x] = wy*8;
1302     for (; x < p->xblen >> right; x++)
1303         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1304     for (; x < p->xblen; x++)
1305         obmc_weight[x] = wy*8;
1306     for (; x < stride; x++)
1307         obmc_weight[x] = 0;
1308 }
1309
1310 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1311                              int left, int right, int top, int bottom)
1312 {
1313     int y;
1314     for (y = 0; top && y < p->yblen >> 1; y++) {
1315         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1316         obmc_weight += stride;
1317     }
1318     for (; y < p->yblen >> bottom; y++) {
1319         int wy = weight(y, p->yblen, p->yoffset);
1320         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1321         obmc_weight += stride;
1322     }
1323     for (; y < p->yblen; y++) {
1324         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1325         obmc_weight += stride;
1326     }
1327 }
1328
1329 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1330 {
1331     int top = !by;
1332     int bottom = by == s->blheight-1;
1333
1334     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1335     if (top || bottom || by == 1) {
1336         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1337         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1338         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1339     }
1340 }
1341
1342 static const uint8_t epel_weights[4][4][4] = {
1343     {{ 16,  0,  0,  0 },
1344      { 12,  4,  0,  0 },
1345      {  8,  8,  0,  0 },
1346      {  4, 12,  0,  0 }},
1347     {{ 12,  0,  4,  0 },
1348      {  9,  3,  3,  1 },
1349      {  6,  6,  2,  2 },
1350      {  3,  9,  1,  3 }},
1351     {{  8,  0,  8,  0 },
1352      {  6,  2,  6,  2 },
1353      {  4,  4,  4,  4 },
1354      {  2,  6,  2,  6 }},
1355     {{  4,  0, 12,  0 },
1356      {  3,  1,  9,  3 },
1357      {  2,  2,  6,  6 },
1358      {  1,  3,  3,  9 }}
1359 };
1360
1361 /**
1362  * For block x,y, determine which of the hpel planes to do bilinear
1363  * interpolation from and set src[] to the location in each hpel plane
1364  * to MC from.
1365  *
1366  * @return the index of the put_dirac_pixels_tab function to use
1367  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1368  */
1369 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1370                      int x, int y, int ref, int plane)
1371 {
1372     Plane *p = &s->plane[plane];
1373     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1374     int motion_x = block->u.mv[ref][0];
1375     int motion_y = block->u.mv[ref][1];
1376     int mx, my, i, epel, nplanes = 0;
1377
1378     if (plane) {
1379         motion_x >>= s->chroma_x_shift;
1380         motion_y >>= s->chroma_y_shift;
1381     }
1382
1383     mx         = motion_x & ~(-1U << s->mv_precision);
1384     my         = motion_y & ~(-1U << s->mv_precision);
1385     motion_x >>= s->mv_precision;
1386     motion_y >>= s->mv_precision;
1387     /* normalize subpel coordinates to epel */
1388     /* TODO: template this function? */
1389     mx      <<= 3 - s->mv_precision;
1390     my      <<= 3 - s->mv_precision;
1391
1392     x += motion_x;
1393     y += motion_y;
1394     epel = (mx|my)&1;
1395
1396     /* hpel position */
1397     if (!((mx|my)&3)) {
1398         nplanes = 1;
1399         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1400     } else {
1401         /* qpel or epel */
1402         nplanes = 4;
1403         for (i = 0; i < 4; i++)
1404             src[i] = ref_hpel[i] + y*p->stride + x;
1405
1406         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1407            we increment x/y because the edge changes for half of the pixels */
1408         if (mx > 4) {
1409             src[0] += 1;
1410             src[2] += 1;
1411             x++;
1412         }
1413         if (my > 4) {
1414             src[0] += p->stride;
1415             src[1] += p->stride;
1416             y++;
1417         }
1418
1419         /* hpel planes are:
1420            [0]: F  [1]: H
1421            [2]: V  [3]: C */
1422         if (!epel) {
1423             /* check if we really only need 2 planes since either mx or my is
1424                a hpel position. (epel weights of 0 handle this there) */
1425             if (!(mx&3)) {
1426                 /* mx == 0: average [0] and [2]
1427                    mx == 4: average [1] and [3] */
1428                 src[!mx] = src[2 + !!mx];
1429                 nplanes = 2;
1430             } else if (!(my&3)) {
1431                 src[0] = src[(my>>1)  ];
1432                 src[1] = src[(my>>1)+1];
1433                 nplanes = 2;
1434             }
1435         } else {
1436             /* adjust the ordering if needed so the weights work */
1437             if (mx > 4) {
1438                 FFSWAP(const uint8_t *, src[0], src[1]);
1439                 FFSWAP(const uint8_t *, src[2], src[3]);
1440             }
1441             if (my > 4) {
1442                 FFSWAP(const uint8_t *, src[0], src[2]);
1443                 FFSWAP(const uint8_t *, src[1], src[3]);
1444             }
1445             src[4] = epel_weights[my&3][mx&3];
1446         }
1447     }
1448
1449     /* fixme: v/h _edge_pos */
1450     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1451         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1452         x < 0 || y < 0) {
1453         for (i = 0; i < nplanes; i++) {
1454             s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1455                                      p->stride, p->stride,
1456                                      p->xblen, p->yblen, x, y,
1457                                      p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1458             src[i] = s->edge_emu_buffer[i];
1459         }
1460     }
1461     return (nplanes>>1) + epel;
1462 }
1463
1464 static void add_dc(uint16_t *dst, int dc, int stride,
1465                    uint8_t *obmc_weight, int xblen, int yblen)
1466 {
1467     int x, y;
1468     dc += 128;
1469
1470     for (y = 0; y < yblen; y++) {
1471         for (x = 0; x < xblen; x += 2) {
1472             dst[x  ] += dc * obmc_weight[x  ];
1473             dst[x+1] += dc * obmc_weight[x+1];
1474         }
1475         dst          += stride;
1476         obmc_weight  += MAX_BLOCKSIZE;
1477     }
1478 }
1479
1480 static void block_mc(DiracContext *s, DiracBlock *block,
1481                      uint16_t *mctmp, uint8_t *obmc_weight,
1482                      int plane, int dstx, int dsty)
1483 {
1484     Plane *p = &s->plane[plane];
1485     const uint8_t *src[5];
1486     int idx;
1487
1488     switch (block->ref&3) {
1489     case 0: /* DC */
1490         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1491         return;
1492     case 1:
1493     case 2:
1494         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1495         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1496         if (s->weight_func)
1497             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1498                            s->weight[0] + s->weight[1], p->yblen);
1499         break;
1500     case 3:
1501         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1502         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1503         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1504         if (s->biweight_func) {
1505             /* fixme: +32 is a quick hack */
1506             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1507             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1508                              s->weight[0], s->weight[1], p->yblen);
1509         } else
1510             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1511         break;
1512     }
1513     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1514 }
1515
1516 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1517 {
1518     Plane *p = &s->plane[plane];
1519     int x, dstx = p->xbsep - p->xoffset;
1520
1521     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1522     mctmp += p->xbsep;
1523
1524     for (x = 1; x < s->blwidth-1; x++) {
1525         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1526         dstx  += p->xbsep;
1527         mctmp += p->xbsep;
1528     }
1529     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1530 }
1531
1532 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1533 {
1534     int idx = 0;
1535     if (xblen > 8)
1536         idx = 1;
1537     if (xblen > 16)
1538         idx = 2;
1539
1540     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1541     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1542     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1543     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1544         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1545         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1546     } else {
1547         s->weight_func   = NULL;
1548         s->biweight_func = NULL;
1549     }
1550 }
1551
1552 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1553 {
1554     /* chroma allocates an edge of 8 when subsampled
1555        which for 4:2:2 means an h edge of 16 and v edge of 8
1556        just use 8 for everything for the moment */
1557     int i, edge = EDGE_WIDTH/2;
1558
1559     ref->hpel[plane][0] = ref->avframe->data[plane];
1560     s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1561
1562     /* no need for hpel if we only have fpel vectors */
1563     if (!s->mv_precision)
1564         return;
1565
1566     for (i = 1; i < 4; i++) {
1567         if (!ref->hpel_base[plane][i])
1568             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1569         /* we need to be 16-byte aligned even for chroma */
1570         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1571     }
1572
1573     if (!ref->interpolated[plane]) {
1574         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1575                                       ref->hpel[plane][3], ref->hpel[plane][0],
1576                                       ref->avframe->linesize[plane], width, height);
1577         s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1578         s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1579         s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1580     }
1581     ref->interpolated[plane] = 1;
1582 }
1583
1584 /**
1585  * Dirac Specification ->
1586  * 13.0 Transform data syntax. transform_data()
1587  */
1588 static int dirac_decode_frame_internal(DiracContext *s)
1589 {
1590     DWTContext d;
1591     int y, i, comp, dsty;
1592
1593     if (s->low_delay) {
1594         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1595         for (comp = 0; comp < 3; comp++) {
1596             Plane *p = &s->plane[comp];
1597             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1598         }
1599         if (!s->zero_res)
1600             decode_lowdelay(s);
1601     }
1602
1603     for (comp = 0; comp < 3; comp++) {
1604         Plane *p       = &s->plane[comp];
1605         uint8_t *frame = s->current_picture->avframe->data[comp];
1606
1607         /* FIXME: small resolutions */
1608         for (i = 0; i < 4; i++)
1609             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1610
1611         if (!s->zero_res && !s->low_delay)
1612         {
1613             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1614             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1615         }
1616         if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1617                                   s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1618             return -1;
1619
1620         if (!s->num_refs) { /* intra */
1621             for (y = 0; y < p->height; y += 16) {
1622                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1623                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1624                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1625             }
1626         } else { /* inter */
1627             int rowheight = p->ybsep*p->stride;
1628
1629             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1630
1631             for (i = 0; i < s->num_refs; i++)
1632                 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1633
1634             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1635
1636             dsty = -p->yoffset;
1637             for (y = 0; y < s->blheight; y++) {
1638                 int h     = 0,
1639                     start = FFMAX(dsty, 0);
1640                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1641                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1642
1643                 init_obmc_weights(s, p, y);
1644
1645                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1646                     h = p->height - start;
1647                 else
1648                     h = p->ybsep - (start - dsty);
1649                 if (h < 0)
1650                     break;
1651
1652                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1653                 mc_row(s, blocks, mctmp, comp, dsty);
1654
1655                 mctmp += (start - dsty)*p->stride + p->xoffset;
1656                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1657                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1658                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1659
1660                 dsty += p->ybsep;
1661             }
1662         }
1663     }
1664
1665
1666     return 0;
1667 }
1668
1669 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1670 {
1671     int ret, i;
1672     int chroma_x_shift, chroma_y_shift;
1673     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1674
1675     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1676     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1677     ret = ff_get_buffer(avctx, f, flags);
1678     if (ret < 0)
1679         return ret;
1680
1681     for (i = 0; f->data[i]; i++) {
1682         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1683                      f->linesize[i] + 32;
1684         f->data[i] += offset;
1685     }
1686     f->width  = avctx->width;
1687     f->height = avctx->height;
1688
1689     return 0;
1690 }
1691
1692 /**
1693  * Dirac Specification ->
1694  * 11.1.1 Picture Header. picture_header()
1695  */
1696 static int dirac_decode_picture_header(DiracContext *s)
1697 {
1698     int retire, picnum;
1699     int i, j, refnum, refdist;
1700     GetBitContext *gb = &s->gb;
1701
1702     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1703     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1704
1705
1706     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1707
1708     /* if this is the first keyframe after a sequence header, start our
1709        reordering from here */
1710     if (s->frame_number < 0)
1711         s->frame_number = picnum;
1712
1713     s->ref_pics[0] = s->ref_pics[1] = NULL;
1714     for (i = 0; i < s->num_refs; i++) {
1715         refnum = picnum + dirac_get_se_golomb(gb);
1716         refdist = INT_MAX;
1717
1718         /* find the closest reference to the one we want */
1719         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1720         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1721             if (s->ref_frames[j]
1722                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1723                 s->ref_pics[i] = s->ref_frames[j];
1724                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1725             }
1726
1727         if (!s->ref_pics[i] || refdist)
1728             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1729
1730         /* if there were no references at all, allocate one */
1731         if (!s->ref_pics[i])
1732             for (j = 0; j < MAX_FRAMES; j++)
1733                 if (!s->all_frames[j].avframe->data[0]) {
1734                     s->ref_pics[i] = &s->all_frames[j];
1735                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1736                     break;
1737                 }
1738     }
1739
1740     /* retire the reference frames that are not used anymore */
1741     if (s->current_picture->avframe->reference) {
1742         retire = picnum + dirac_get_se_golomb(gb);
1743         if (retire != picnum) {
1744             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1745
1746             if (retire_pic)
1747                 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1748             else
1749                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1750         }
1751
1752         /* if reference array is full, remove the oldest as per the spec */
1753         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1754             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1755             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1756         }
1757     }
1758
1759     if (s->num_refs) {
1760         if (dirac_unpack_prediction_parameters(s))  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1761             return -1;
1762         if (dirac_unpack_block_motion_data(s))      /* [DIRAC_STD] 12. Block motion data syntax                       */
1763             return -1;
1764     }
1765     if (dirac_unpack_idwt_params(s))                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1766         return -1;
1767
1768     init_planes(s);
1769     return 0;
1770 }
1771
1772 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1773 {
1774     DiracFrame *out = s->delay_frames[0];
1775     int i, out_idx  = 0;
1776     int ret;
1777
1778     /* find frame with lowest picture number */
1779     for (i = 1; s->delay_frames[i]; i++)
1780         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1781             out     = s->delay_frames[i];
1782             out_idx = i;
1783         }
1784
1785     for (i = out_idx; s->delay_frames[i]; i++)
1786         s->delay_frames[i] = s->delay_frames[i+1];
1787
1788     if (out) {
1789         out->avframe->reference ^= DELAYED_PIC_REF;
1790         *got_frame = 1;
1791         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1792             return ret;
1793     }
1794
1795     return 0;
1796 }
1797
1798 /**
1799  * Dirac Specification ->
1800  * 9.6 Parse Info Header Syntax. parse_info()
1801  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1802  */
1803 #define DATA_UNIT_HEADER_SIZE 13
1804
1805 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1806    inside the function parse_sequence() */
1807 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1808 {
1809     DiracContext *s   = avctx->priv_data;
1810     DiracFrame *pic   = NULL;
1811     int ret, i, parse_code = buf[4];
1812     unsigned tmp;
1813
1814     if (size < DATA_UNIT_HEADER_SIZE)
1815         return -1;
1816
1817     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1818
1819     if (parse_code == pc_seq_header) {
1820         if (s->seen_sequence_header)
1821             return 0;
1822
1823         /* [DIRAC_STD] 10. Sequence header */
1824         if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1825             return -1;
1826
1827         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1828
1829         if (alloc_sequence_buffers(s))
1830             return -1;
1831
1832         s->seen_sequence_header = 1;
1833     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1834         free_sequence_buffers(s);
1835         s->seen_sequence_header = 0;
1836     } else if (parse_code == pc_aux_data) {
1837         if (buf[13] == 1) {     /* encoder implementation/version */
1838             int ver[3];
1839             /* versions older than 1.0.8 don't store quant delta for
1840                subbands with only one codeblock */
1841             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1842                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1843                     s->old_delta_quant = 1;
1844         }
1845     } else if (parse_code & 0x8) {  /* picture data unit */
1846         if (!s->seen_sequence_header) {
1847             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1848             return -1;
1849         }
1850
1851         /* find an unused frame */
1852         for (i = 0; i < MAX_FRAMES; i++)
1853             if (s->all_frames[i].avframe->data[0] == NULL)
1854                 pic = &s->all_frames[i];
1855         if (!pic) {
1856             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1857             return -1;
1858         }
1859
1860         av_frame_unref(pic->avframe);
1861
1862         /* [DIRAC_STD] Defined in 9.6.1 ... */
1863         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1864         if (tmp > 2) {
1865             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1866             return -1;
1867         }
1868         s->num_refs    = tmp;
1869         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1870         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1871         pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1872         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1873         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1874
1875         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1876             return ret;
1877         s->current_picture = pic;
1878         s->plane[0].stride = pic->avframe->linesize[0];
1879         s->plane[1].stride = pic->avframe->linesize[1];
1880         s->plane[2].stride = pic->avframe->linesize[2];
1881
1882         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1883             return AVERROR(ENOMEM);
1884
1885         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1886         if (dirac_decode_picture_header(s))
1887             return -1;
1888
1889         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1890         if (dirac_decode_frame_internal(s))
1891             return -1;
1892     }
1893     return 0;
1894 }
1895
1896 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1897 {
1898     DiracContext *s     = avctx->priv_data;
1899     AVFrame *picture    = data;
1900     uint8_t *buf        = pkt->data;
1901     int buf_size        = pkt->size;
1902     int i, data_unit_size, buf_idx = 0;
1903     int ret;
1904
1905     /* release unused frames */
1906     for (i = 0; i < MAX_FRAMES; i++)
1907         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1908             av_frame_unref(s->all_frames[i].avframe);
1909             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1910         }
1911
1912     s->current_picture = NULL;
1913     *got_frame = 0;
1914
1915     /* end of stream, so flush delayed pics */
1916     if (buf_size == 0)
1917         return get_delayed_pic(s, (AVFrame *)data, got_frame);
1918
1919     for (;;) {
1920         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1921           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1922           BBCD start code search */
1923         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1924             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1925                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1926                 break;
1927         }
1928         /* BBCD found or end of data */
1929         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1930             break;
1931
1932         data_unit_size = AV_RB32(buf+buf_idx+5);
1933         if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1934             if(buf_idx + data_unit_size > buf_size)
1935             av_log(s->avctx, AV_LOG_ERROR,
1936                    "Data unit with size %d is larger than input buffer, discarding\n",
1937                    data_unit_size);
1938             buf_idx += 4;
1939             continue;
1940         }
1941         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1942         if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1943         {
1944             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1945             return -1;
1946         }
1947         buf_idx += data_unit_size;
1948     }
1949
1950     if (!s->current_picture)
1951         return buf_size;
1952
1953     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1954         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1955
1956         s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1957
1958         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1959             int min_num = s->delay_frames[0]->avframe->display_picture_number;
1960             /* Too many delayed frames, so we display the frame with the lowest pts */
1961             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1962             delayed_frame = s->delay_frames[0];
1963
1964             for (i = 1; s->delay_frames[i]; i++)
1965                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1966                     min_num = s->delay_frames[i]->avframe->display_picture_number;
1967
1968             delayed_frame = remove_frame(s->delay_frames, min_num);
1969             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1970         }
1971
1972         if (delayed_frame) {
1973             delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1974             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1975                 return ret;
1976             *got_frame = 1;
1977         }
1978     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1979         /* The right frame at the right time :-) */
1980         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1981             return ret;
1982         *got_frame = 1;
1983     }
1984
1985     if (*got_frame)
1986         s->frame_number = picture->display_picture_number + 1;
1987
1988     return buf_idx;
1989 }
1990
1991 AVCodec ff_dirac_decoder = {
1992     .name           = "dirac",
1993     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1994     .type           = AVMEDIA_TYPE_VIDEO,
1995     .id             = AV_CODEC_ID_DIRAC,
1996     .priv_data_size = sizeof(DiracContext),
1997     .init           = dirac_decode_init,
1998     .close          = dirac_decode_end,
1999     .decode         = dirac_decode_frame,
2000     .capabilities   = CODEC_CAP_DELAY,
2001     .flush          = dirac_decode_flush,
2002 };