git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "dsputil.h"
  31 #include "get_bits.h"
  32 #include "bytestream.h"
  33 #include "internal.h"
  34 #include "golomb.h"
  35 #include "dirac_arith.h"
  36 #include "mpeg12data.h"
  37 #include "libavcodec/mpegvideo.h"
  38 #include "mpegvideoencdsp.h"
  39 #include "dirac_dwt.h"
  40 #include "dirac.h"
  41 #include "diracdsp.h"
  42 #include "videodsp.h" // for ff_emulated_edge_mc_8
  43
  44 /**
  45  * The spec limits the number of wavelet decompositions to 4 for both
  46  * level 1 (VC-2) and 128 (long-gop default).
  47  * 5 decompositions is the maximum before >16-bit buffers are needed.
  48  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  49  * the others to 4 decompositions (or 3 for the fidelity filter).
  50  *
  51  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  52  */
  53 #define MAX_DWT_LEVELS 5
  54
  55 /**
  56  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  57  */
  58 #define MAX_REFERENCE_FRAMES 8
  59 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  60 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  61 #define MAX_QUANT 68        /* max quant for VC-2 */
  62 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  63
  64 /**
  65  * DiracBlock->ref flags, if set then the block does MC from the given ref
  66  */
  67 #define DIRAC_REF_MASK_REF1   1
  68 #define DIRAC_REF_MASK_REF2   2
  69 #define DIRAC_REF_MASK_GLOBAL 4
  70
  71 /**
  72  * Value of Picture.reference when Picture is not a reference picture, but
  73  * is held for delayed output.
  74  */
  75 #define DELAYED_PIC_REF 4
  76
  77 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
  78
  79 #define CALC_PADDING(size, depth)                       \
  80     (((size + (1 << depth) - 1) >> depth) << depth)
  81
  82 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  83
  84 typedef struct {
  85     AVFrame *avframe;
  86     int interpolated[3];    /* 1 if hpel[] is valid */
  87     uint8_t *hpel[3][4];
  88     uint8_t *hpel_base[3][4];
  89 } DiracFrame;
  90
  91 typedef struct {
  92     union {
  93         int16_t mv[2][2];
  94         int16_t dc[3];
  95     } u; /* anonymous unions aren't in C99 :( */
  96     uint8_t ref;
  97 } DiracBlock;
  98
  99 typedef struct SubBand {
 100     int level;
 101     int orientation;
 102     int stride;
 103     int width;
 104     int height;
 105     int quant;
 106     IDWTELEM *ibuf;
 107     struct SubBand *parent;
 108
 109     /* for low delay */
 110     unsigned length;
 111     const uint8_t *coeff_data;
 112 } SubBand;
 113
 114 typedef struct Plane {
 115     int width;
 116     int height;
 117     ptrdiff_t stride;
 118
 119     int idwt_width;
 120     int idwt_height;
 121     int idwt_stride;
 122     IDWTELEM *idwt_buf;
 123     IDWTELEM *idwt_buf_base;
 124     IDWTELEM *idwt_tmp;
 125
 126     /* block length */
 127     uint8_t xblen;
 128     uint8_t yblen;
 129     /* block separation (block n+1 starts after this many pixels in block n) */
 130     uint8_t xbsep;
 131     uint8_t ybsep;
 132     /* amount of overspill on each edge (half of the overlap between blocks) */
 133     uint8_t xoffset;
 134     uint8_t yoffset;
 135
 136     SubBand band[MAX_DWT_LEVELS][4];
 137 } Plane;
 138
 139 typedef struct DiracContext {
 140     AVCodecContext *avctx;
 141     DSPContext dsp;
 142     MpegvideoEncDSPContext mpvencdsp;
 143     DiracDSPContext diracdsp;
 144     GetBitContext gb;
 145     dirac_source_params source;
 146     int seen_sequence_header;
 147     int frame_number;           /* number of the next frame to display       */
 148     Plane plane[3];
 149     int chroma_x_shift;
 150     int chroma_y_shift;
 151
 152     int zero_res;               /* zero residue flag                         */
 153     int is_arith;               /* whether coeffs use arith or golomb coding */
 154     int low_delay;              /* use the low delay syntax                  */
 155     int globalmc_flag;          /* use global motion compensation            */
 156     int num_refs;               /* number of reference pictures              */
 157
 158     /* wavelet decoding */
 159     unsigned wavelet_depth;     /* depth of the IDWT                         */
 160     unsigned wavelet_idx;
 161
 162     /**
 163      * schroedinger older than 1.0.8 doesn't store
 164      * quant delta if only one codebook exists in a band
 165      */
 166     unsigned old_delta_quant;
 167     unsigned codeblock_mode;
 168
 169     struct {
 170         unsigned width;
 171         unsigned height;
 172     } codeblock[MAX_DWT_LEVELS+1];
 173
 174     struct {
 175         unsigned num_x;         /* number of horizontal slices               */
 176         unsigned num_y;         /* number of vertical slices                 */
 177         AVRational bytes;       /* average bytes per slice                   */
 178         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 179     } lowdelay;
 180
 181     struct {
 182         int pan_tilt[2];        /* pan/tilt vector                           */
 183         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 184         int perspective[2];     /* perspective vector                        */
 185         unsigned zrs_exp;
 186         unsigned perspective_exp;
 187     } globalmc[2];
 188
 189     /* motion compensation */
 190     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 191     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 192     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 193
 194     int blwidth;                /* number of blocks (horizontally)           */
 195     int blheight;               /* number of blocks (vertically)             */
 196     int sbwidth;                /* number of superblocks (horizontally)      */
 197     int sbheight;               /* number of superblocks (vertically)        */
 198
 199     uint8_t *sbsplit;
 200     DiracBlock *blmotion;
 201
 202     uint8_t *edge_emu_buffer[4];
 203     uint8_t *edge_emu_buffer_base;
 204
 205     uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
 206     uint8_t *mcscratch;
 207     int buffer_stride;
 208
 209     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 210
 211     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 212     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 213     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 214     dirac_weight_func weight_func;
 215     dirac_biweight_func biweight_func;
 216
 217     DiracFrame *current_picture;
 218     DiracFrame *ref_pics[2];
 219
 220     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 221     DiracFrame *delay_frames[MAX_DELAY+1];
 222     DiracFrame all_frames[MAX_FRAMES];
 223 } DiracContext;
 224
 225 /**
 226  * Dirac Specification ->
 227  * Parse code values. 9.6.1 Table 9.1
 228  */
 229 enum dirac_parse_code {
 230     pc_seq_header         = 0x00,
 231     pc_eos                = 0x10,
 232     pc_aux_data           = 0x20,
 233     pc_padding            = 0x30,
 234 };
 235
 236 enum dirac_subband {
 237     subband_ll = 0,
 238     subband_hl = 1,
 239     subband_lh = 2,
 240     subband_hh = 3
 241 };
 242
 243 static const uint8_t default_qmat[][4][4] = {
 244     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 245     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 246     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 247     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 248     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 249     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 250     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 251 };
 252
 253 static const int qscale_tab[MAX_QUANT+1] = {
 254     4,     5,     6,     7,     8,    10,    11,    13,
 255     16,    19,    23,    27,    32,    38,    45,    54,
 256     64,    76,    91,   108,   128,   152,   181,   215,
 257     256,   304,   362,   431,   512,   609,   724,   861,
 258     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 259     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 260     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 261     65536, 77936
 262 };
 263
 264 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 265     1,     2,     3,     4,     4,     5,     6,     7,
 266     8,    10,    12,    14,    16,    19,    23,    27,
 267     32,    38,    46,    54,    64,    76,    91,   108,
 268     128,   152,   181,   216,   256,   305,   362,   431,
 269     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 270     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 271     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 272     32768, 38968
 273 };
 274
 275 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 276     1,     2,     2,     3,     3,     4,     4,     5,
 277     6,     7,     9,    10,    12,    14,    17,    20,
 278     24,    29,    34,    41,    48,    57,    68,    81,
 279     96,   114,   136,   162,   192,   228,   272,   323,
 280     384,   457,   543,   646,   768,   913,  1086,  1292,
 281     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 282     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 283     24576, 29226
 284 };
 285
 286 /* magic number division by 3 from schroedinger */
 287 static inline int divide3(int x)
 288 {
 289     return ((x+1)*21845 + 10922) >> 16;
 290 }
 291
 292 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 293 {
 294     DiracFrame *remove_pic = NULL;
 295     int i, remove_idx = -1;
 296
 297     for (i = 0; framelist[i]; i++)
 298         if (framelist[i]->avframe->display_picture_number == picnum) {
 299             remove_pic = framelist[i];
 300             remove_idx = i;
 301         }
 302
 303     if (remove_pic)
 304         for (i = remove_idx; framelist[i]; i++)
 305             framelist[i] = framelist[i+1];
 306
 307     return remove_pic;
 308 }
 309
 310 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 311 {
 312     int i;
 313     for (i = 0; i < maxframes; i++)
 314         if (!framelist[i]) {
 315             framelist[i] = frame;
 316             return 0;
 317         }
 318     return -1;
 319 }
 320
 321 static int alloc_sequence_buffers(DiracContext *s)
 322 {
 323     int sbwidth  = DIVRNDUP(s->source.width,  4);
 324     int sbheight = DIVRNDUP(s->source.height, 4);
 325     int i, w, h, top_padding;
 326
 327     /* todo: think more about this / use or set Plane here */
 328     for (i = 0; i < 3; i++) {
 329         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 330         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 331         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 332         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 333
 334         /* we allocate the max we support here since num decompositions can
 335          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 336          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 337          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 338          * on each side */
 339         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 340         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 341         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 342
 343         s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
 344         s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
 345         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 346         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 347             return AVERROR(ENOMEM);
 348     }
 349
 350     /* fixme: allocate using real stride here */
 351     s->sbsplit  = av_malloc_array(sbwidth, sbheight);
 352     s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
 353
 354     if (!s->sbsplit || !s->blmotion)
 355         return AVERROR(ENOMEM);
 356     return 0;
 357 }
 358
 359 static int alloc_buffers(DiracContext *s, int stride)
 360 {
 361     int w = s->source.width;
 362     int h = s->source.height;
 363
 364     av_assert0(stride >= w);
 365     stride += 64;
 366
 367     if (s->buffer_stride >= stride)
 368         return 0;
 369     s->buffer_stride = 0;
 370
 371     av_freep(&s->edge_emu_buffer_base);
 372     memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
 373     av_freep(&s->mctmp);
 374     av_freep(&s->mcscratch);
 375
 376     s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
 377
 378     s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 379     s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
 380
 381     if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
 382         return AVERROR(ENOMEM);
 383
 384     s->buffer_stride = stride;
 385     return 0;
 386 }
 387
 388 static void free_sequence_buffers(DiracContext *s)
 389 {
 390     int i, j, k;
 391
 392     for (i = 0; i < MAX_FRAMES; i++) {
 393         if (s->all_frames[i].avframe->data[0]) {
 394             av_frame_unref(s->all_frames[i].avframe);
 395             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 396         }
 397
 398         for (j = 0; j < 3; j++)
 399             for (k = 1; k < 4; k++)
 400                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 401     }
 402
 403     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 404     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 405
 406     for (i = 0; i < 3; i++) {
 407         av_freep(&s->plane[i].idwt_buf_base);
 408         av_freep(&s->plane[i].idwt_tmp);
 409     }
 410
 411     s->buffer_stride = 0;
 412     av_freep(&s->sbsplit);
 413     av_freep(&s->blmotion);
 414     av_freep(&s->edge_emu_buffer_base);
 415
 416     av_freep(&s->mctmp);
 417     av_freep(&s->mcscratch);
 418 }
 419
 420 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 421 {
 422     DiracContext *s = avctx->priv_data;
 423     int i;
 424
 425     s->avctx = avctx;
 426     s->frame_number = -1;
 427
 428     ff_dsputil_init(&s->dsp, avctx);
 429     ff_diracdsp_init(&s->diracdsp);
 430     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 431
 432     for (i = 0; i < MAX_FRAMES; i++) {
 433         s->all_frames[i].avframe = av_frame_alloc();
 434         if (!s->all_frames[i].avframe) {
 435             while (i > 0)
 436                 av_frame_free(&s->all_frames[--i].avframe);
 437             return AVERROR(ENOMEM);
 438         }
 439     }
 440
 441     return 0;
 442 }
 443
 444 static void dirac_decode_flush(AVCodecContext *avctx)
 445 {
 446     DiracContext *s = avctx->priv_data;
 447     free_sequence_buffers(s);
 448     s->seen_sequence_header = 0;
 449     s->frame_number = -1;
 450 }
 451
 452 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 453 {
 454     DiracContext *s = avctx->priv_data;
 455     int i;
 456
 457     dirac_decode_flush(avctx);
 458     for (i = 0; i < MAX_FRAMES; i++)
 459         av_frame_free(&s->all_frames[i].avframe);
 460
 461     return 0;
 462 }
 463
 464 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 465
 466 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 467                                       SubBand *b, IDWTELEM *buf, int x, int y)
 468 {
 469     int coeff, sign;
 470     int sign_pred = 0;
 471     int pred_ctx = CTX_ZPZN_F1;
 472
 473     /* Check if the parent subband has a 0 in the corresponding position */
 474     if (b->parent)
 475         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 476
 477     if (b->orientation == subband_hl)
 478         sign_pred = buf[-b->stride];
 479
 480     /* Determine if the pixel has only zeros in its neighbourhood */
 481     if (x) {
 482         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 483         if (b->orientation == subband_lh)
 484             sign_pred = buf[-1];
 485     } else {
 486         pred_ctx += !buf[-b->stride];
 487     }
 488
 489     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 490     if (coeff) {
 491         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 492         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 493         coeff = (coeff ^ -sign) + sign;
 494     }
 495     *buf = coeff;
 496 }
 497
 498 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 499 {
 500     int sign, coeff;
 501
 502     coeff = svq3_get_ue_golomb(gb);
 503     if (coeff) {
 504         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 505         sign  = get_bits1(gb);
 506         coeff = (coeff ^ -sign) + sign;
 507     }
 508     return coeff;
 509 }
 510
 511 /**
 512  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 513  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 514  */
 515 static inline void codeblock(DiracContext *s, SubBand *b,
 516                              GetBitContext *gb, DiracArith *c,
 517                              int left, int right, int top, int bottom,
 518                              int blockcnt_one, int is_arith)
 519 {
 520     int x, y, zero_block;
 521     int qoffset, qfactor;
 522     IDWTELEM *buf;
 523
 524     /* check for any coded coefficients in this codeblock */
 525     if (!blockcnt_one) {
 526         if (is_arith)
 527             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 528         else
 529             zero_block = get_bits1(gb);
 530
 531         if (zero_block)
 532             return;
 533     }
 534
 535     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 536         int quant = b->quant;
 537         if (is_arith)
 538             quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 539         else
 540             quant += dirac_get_se_golomb(gb);
 541         if (quant < 0) {
 542             av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
 543             return;
 544         }
 545         b->quant = quant;
 546     }
 547
 548     b->quant = FFMIN(b->quant, MAX_QUANT);
 549
 550     qfactor = qscale_tab[b->quant];
 551     /* TODO: context pointer? */
 552     if (!s->num_refs)
 553         qoffset = qoffset_intra_tab[b->quant];
 554     else
 555         qoffset = qoffset_inter_tab[b->quant];
 556
 557     buf = b->ibuf + top * b->stride;
 558     for (y = top; y < bottom; y++) {
 559         for (x = left; x < right; x++) {
 560             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 561             if (is_arith)
 562                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 563             else
 564                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 565         }
 566         buf += b->stride;
 567     }
 568 }
 569
 570 /**
 571  * Dirac Specification ->
 572  * 13.3 intra_dc_prediction(band)
 573  */
 574 static inline void intra_dc_prediction(SubBand *b)
 575 {
 576     IDWTELEM *buf = b->ibuf;
 577     int x, y;
 578
 579     for (x = 1; x < b->width; x++)
 580         buf[x] += buf[x-1];
 581     buf += b->stride;
 582
 583     for (y = 1; y < b->height; y++) {
 584         buf[0] += buf[-b->stride];
 585
 586         for (x = 1; x < b->width; x++) {
 587             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 588             buf[x]  += divide3(pred);
 589         }
 590         buf += b->stride;
 591     }
 592 }
 593
 594 /**
 595  * Dirac Specification ->
 596  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 597  */
 598 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 599 {
 600     int cb_x, cb_y, left, right, top, bottom;
 601     DiracArith c;
 602     GetBitContext gb;
 603     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 604     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 605     int blockcnt_one = (cb_width + cb_height) == 2;
 606
 607     if (!b->length)
 608         return;
 609
 610     init_get_bits8(&gb, b->coeff_data, b->length);
 611
 612     if (is_arith)
 613         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 614
 615     top = 0;
 616     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 617         bottom = (b->height * (cb_y+1)) / cb_height;
 618         left = 0;
 619         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 620             right = (b->width * (cb_x+1)) / cb_width;
 621             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 622             left = right;
 623         }
 624         top = bottom;
 625     }
 626
 627     if (b->orientation == subband_ll && s->num_refs == 0)
 628         intra_dc_prediction(b);
 629 }
 630
 631 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 632 {
 633     DiracContext *s = avctx->priv_data;
 634     decode_subband_internal(s, b, 1);
 635     return 0;
 636 }
 637
 638 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 639 {
 640     DiracContext *s = avctx->priv_data;
 641     SubBand **b     = arg;
 642     decode_subband_internal(s, *b, 0);
 643     return 0;
 644 }
 645
 646 /**
 647  * Dirac Specification ->
 648  * [DIRAC_STD] 13.4.1 core_transform_data()
 649  */
 650 static void decode_component(DiracContext *s, int comp)
 651 {
 652     AVCodecContext *avctx = s->avctx;
 653     SubBand *bands[3*MAX_DWT_LEVELS+1];
 654     enum dirac_subband orientation;
 655     int level, num_bands = 0;
 656
 657     /* Unpack all subbands at all levels. */
 658     for (level = 0; level < s->wavelet_depth; level++) {
 659         for (orientation = !!level; orientation < 4; orientation++) {
 660             SubBand *b = &s->plane[comp].band[level][orientation];
 661             bands[num_bands++] = b;
 662
 663             align_get_bits(&s->gb);
 664             /* [DIRAC_STD] 13.4.2 subband() */
 665             b->length = svq3_get_ue_golomb(&s->gb);
 666             if (b->length) {
 667                 b->quant = svq3_get_ue_golomb(&s->gb);
 668                 align_get_bits(&s->gb);
 669                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 670                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
 671                 skip_bits_long(&s->gb, b->length*8);
 672             }
 673         }
 674         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 675         if (s->is_arith)
 676             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 677                            NULL, 4-!!level, sizeof(SubBand));
 678     }
 679     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 680     if (!s->is_arith)
 681         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 682 }
 683
 684 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 685 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 686 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 687                              int slice_x, int slice_y, int bits_end,
 688                              SubBand *b1, SubBand *b2)
 689 {
 690     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 691     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 692     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 693     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 694
 695     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 696     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 697
 698     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 699     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 700     int x, y;
 701     /* we have to constantly check for overread since the spec explicitly
 702        requires this, with the meaning that all remaining coeffs are set to 0 */
 703     if (get_bits_count(gb) >= bits_end)
 704         return;
 705
 706     for (y = top; y < bottom; y++) {
 707         for (x = left; x < right; x++) {
 708             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 709             if (get_bits_count(gb) >= bits_end)
 710                 return;
 711             if (buf2) {
 712                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 713                 if (get_bits_count(gb) >= bits_end)
 714                     return;
 715             }
 716         }
 717         buf1 += b1->stride;
 718         if (buf2)
 719             buf2 += b2->stride;
 720     }
 721 }
 722
 723 struct lowdelay_slice {
 724     GetBitContext gb;
 725     int slice_x;
 726     int slice_y;
 727     int bytes;
 728 };
 729
 730
 731 /**
 732  * Dirac Specification ->
 733  * 13.5.2 Slices. slice(sx,sy)
 734  */
 735 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 736 {
 737     DiracContext *s = avctx->priv_data;
 738     struct lowdelay_slice *slice = arg;
 739     GetBitContext *gb = &slice->gb;
 740     enum dirac_subband orientation;
 741     int level, quant, chroma_bits, chroma_end;
 742
 743     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 744     int length_bits = av_log2(8 * slice->bytes)+1;
 745     int luma_bits   = get_bits_long(gb, length_bits);
 746     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 747
 748     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 749     for (level = 0; level < s->wavelet_depth; level++)
 750         for (orientation = !!level; orientation < 4; orientation++) {
 751             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 752             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 753                              &s->plane[0].band[level][orientation], NULL);
 754         }
 755
 756     /* consume any unused bits from luma */
 757     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 758
 759     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 760     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 761     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 762     for (level = 0; level < s->wavelet_depth; level++)
 763         for (orientation = !!level; orientation < 4; orientation++) {
 764             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 765             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 766                              &s->plane[1].band[level][orientation],
 767                              &s->plane[2].band[level][orientation]);
 768         }
 769
 770     return 0;
 771 }
 772
 773 /**
 774  * Dirac Specification ->
 775  * 13.5.1 low_delay_transform_data()
 776  */
 777 static void decode_lowdelay(DiracContext *s)
 778 {
 779     AVCodecContext *avctx = s->avctx;
 780     int slice_x, slice_y, bytes, bufsize;
 781     const uint8_t *buf;
 782     struct lowdelay_slice *slices;
 783     int slice_num = 0;
 784
 785     slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 786
 787     align_get_bits(&s->gb);
 788     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 789     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 790     bufsize = get_bits_left(&s->gb);
 791
 792     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 793         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 794             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 795                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 796
 797             slices[slice_num].bytes   = bytes;
 798             slices[slice_num].slice_x = slice_x;
 799             slices[slice_num].slice_y = slice_y;
 800             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 801             slice_num++;
 802
 803             buf     += bytes;
 804             bufsize -= bytes*8;
 805         }
 806
 807     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 808                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 809     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 810     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 811     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 812     av_free(slices);
 813 }
 814
 815 static void init_planes(DiracContext *s)
 816 {
 817     int i, w, h, level, orientation;
 818
 819     for (i = 0; i < 3; i++) {
 820         Plane *p = &s->plane[i];
 821
 822         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 823         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 824         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 825         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 826         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 827
 828         for (level = s->wavelet_depth-1; level >= 0; level--) {
 829             w = w>>1;
 830             h = h>>1;
 831             for (orientation = !!level; orientation < 4; orientation++) {
 832                 SubBand *b = &p->band[level][orientation];
 833
 834                 b->ibuf   = p->idwt_buf;
 835                 b->level  = level;
 836                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 837                 b->width  = w;
 838                 b->height = h;
 839                 b->orientation = orientation;
 840
 841                 if (orientation & 1)
 842                     b->ibuf += w;
 843                 if (orientation > 1)
 844                     b->ibuf += b->stride>>1;
 845
 846                 if (level)
 847                     b->parent = &p->band[level-1][orientation];
 848             }
 849         }
 850
 851         if (i > 0) {
 852             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 853             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 854             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 855             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 856         }
 857
 858         p->xoffset = (p->xblen - p->xbsep)/2;
 859         p->yoffset = (p->yblen - p->ybsep)/2;
 860     }
 861 }
 862
 863 /**
 864  * Unpack the motion compensation parameters
 865  * Dirac Specification ->
 866  * 11.2 Picture prediction data. picture_prediction()
 867  */
 868 static int dirac_unpack_prediction_parameters(DiracContext *s)
 869 {
 870     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 871     static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
 872
 873     GetBitContext *gb = &s->gb;
 874     unsigned idx, ref;
 875
 876     align_get_bits(gb);
 877     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 878     /* Luma and Chroma are equal. 11.2.3 */
 879     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 880
 881     if (idx > 4) {
 882         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 883         return -1;
 884     }
 885
 886     if (idx == 0) {
 887         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 888         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 889         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 890         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 891     } else {
 892         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 893         s->plane[0].xblen = default_blen[idx-1];
 894         s->plane[0].yblen = default_blen[idx-1];
 895         s->plane[0].xbsep = default_bsep[idx-1];
 896         s->plane[0].ybsep = default_bsep[idx-1];
 897     }
 898     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 899       Calculated in function dirac_unpack_block_motion_data */
 900
 901     if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 902         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 903         return -1;
 904     }
 905     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 906         av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
 907         return -1;
 908     }
 909     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 910         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 911         return -1;
 912     }
 913
 914     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 915       Read motion vector precision */
 916     s->mv_precision = svq3_get_ue_golomb(gb);
 917     if (s->mv_precision > 3) {
 918         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 919         return -1;
 920     }
 921
 922     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 923       Read the global motion compensation parameters */
 924     s->globalmc_flag = get_bits1(gb);
 925     if (s->globalmc_flag) {
 926         memset(s->globalmc, 0, sizeof(s->globalmc));
 927         /* [DIRAC_STD] pan_tilt(gparams) */
 928         for (ref = 0; ref < s->num_refs; ref++) {
 929             if (get_bits1(gb)) {
 930                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 931                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 932             }
 933             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 934                zoom/rotation/shear parameters */
 935             if (get_bits1(gb)) {
 936                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 937                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 938                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 939                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 940                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 941             } else {
 942                 s->globalmc[ref].zrs[0][0] = 1;
 943                 s->globalmc[ref].zrs[1][1] = 1;
 944             }
 945             /* [DIRAC_STD] perspective(gparams) */
 946             if (get_bits1(gb)) {
 947                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 948                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 949                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 950             }
 951         }
 952     }
 953
 954     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 955       Picture prediction mode, not currently used. */
 956     if (svq3_get_ue_golomb(gb)) {
 957         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 958         return -1;
 959     }
 960
 961     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 962        just data read, weight calculation will be done later on. */
 963     s->weight_log2denom = 1;
 964     s->weight[0]        = 1;
 965     s->weight[1]        = 1;
 966
 967     if (get_bits1(gb)) {
 968         s->weight_log2denom = svq3_get_ue_golomb(gb);
 969         s->weight[0] = dirac_get_se_golomb(gb);
 970         if (s->num_refs == 2)
 971             s->weight[1] = dirac_get_se_golomb(gb);
 972     }
 973     return 0;
 974 }
 975
 976 /**
 977  * Dirac Specification ->
 978  * 11.3 Wavelet transform data. wavelet_transform()
 979  */
 980 static int dirac_unpack_idwt_params(DiracContext *s)
 981 {
 982     GetBitContext *gb = &s->gb;
 983     int i, level;
 984     unsigned tmp;
 985
 986 #define CHECKEDREAD(dst, cond, errmsg) \
 987     tmp = svq3_get_ue_golomb(gb); \
 988     if (cond) { \
 989         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
 990         return -1; \
 991     }\
 992     dst = tmp;
 993
 994     align_get_bits(gb);
 995
 996     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
 997     if (s->zero_res)
 998         return 0;
 999
1000     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
1001     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1002
1003     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1004
1005     if (!s->low_delay) {
1006         /* Codeblock parameters (core syntax only) */
1007         if (get_bits1(gb)) {
1008             for (i = 0; i <= s->wavelet_depth; i++) {
1009                 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
1010                 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
1011             }
1012
1013             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1014         } else
1015             for (i = 0; i <= s->wavelet_depth; i++)
1016                 s->codeblock[i].width = s->codeblock[i].height = 1;
1017     } else {
1018         /* Slice parameters + quantization matrix*/
1019         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1020         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
1021         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
1022         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1023         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1024
1025         if (s->lowdelay.bytes.den <= 0) {
1026             av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1027             return AVERROR_INVALIDDATA;
1028         }
1029
1030         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1031         if (get_bits1(gb)) {
1032             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1033             /* custom quantization matrix */
1034             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1035             for (level = 0; level < s->wavelet_depth; level++) {
1036                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1037                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1038                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1039             }
1040         } else {
1041             if (s->wavelet_depth > 4) {
1042                 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1043                 return AVERROR_INVALIDDATA;
1044             }
1045             /* default quantization matrix */
1046             for (level = 0; level < s->wavelet_depth; level++)
1047                 for (i = 0; i < 4; i++) {
1048                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1049                     /* haar with no shift differs for different depths */
1050                     if (s->wavelet_idx == 3)
1051                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1052                 }
1053         }
1054     }
1055     return 0;
1056 }
1057
1058 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1059 {
1060     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1061
1062     if (!(x|y))
1063         return 0;
1064     else if (!y)
1065         return sbsplit[-1];
1066     else if (!x)
1067         return sbsplit[-stride];
1068
1069     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1070 }
1071
1072 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1073 {
1074     int pred;
1075
1076     if (!(x|y))
1077         return 0;
1078     else if (!y)
1079         return block[-1].ref & refmask;
1080     else if (!x)
1081         return block[-stride].ref & refmask;
1082
1083     /* return the majority */
1084     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1085     return (pred >> 1) & refmask;
1086 }
1087
1088 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1089 {
1090     int i, n = 0;
1091
1092     memset(block->u.dc, 0, sizeof(block->u.dc));
1093
1094     if (x && !(block[-1].ref & 3)) {
1095         for (i = 0; i < 3; i++)
1096             block->u.dc[i] += block[-1].u.dc[i];
1097         n++;
1098     }
1099
1100     if (y && !(block[-stride].ref & 3)) {
1101         for (i = 0; i < 3; i++)
1102             block->u.dc[i] += block[-stride].u.dc[i];
1103         n++;
1104     }
1105
1106     if (x && y && !(block[-1-stride].ref & 3)) {
1107         for (i = 0; i < 3; i++)
1108             block->u.dc[i] += block[-1-stride].u.dc[i];
1109         n++;
1110     }
1111
1112     if (n == 2) {
1113         for (i = 0; i < 3; i++)
1114             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1115     } else if (n == 3) {
1116         for (i = 0; i < 3; i++)
1117             block->u.dc[i] = divide3(block->u.dc[i]);
1118     }
1119 }
1120
1121 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1122 {
1123     int16_t *pred[3];
1124     int refmask = ref+1;
1125     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1126     int n = 0;
1127
1128     if (x && (block[-1].ref & mask) == refmask)
1129         pred[n++] = block[-1].u.mv[ref];
1130
1131     if (y && (block[-stride].ref & mask) == refmask)
1132         pred[n++] = block[-stride].u.mv[ref];
1133
1134     if (x && y && (block[-stride-1].ref & mask) == refmask)
1135         pred[n++] = block[-stride-1].u.mv[ref];
1136
1137     switch (n) {
1138     case 0:
1139         block->u.mv[ref][0] = 0;
1140         block->u.mv[ref][1] = 0;
1141         break;
1142     case 1:
1143         block->u.mv[ref][0] = pred[0][0];
1144         block->u.mv[ref][1] = pred[0][1];
1145         break;
1146     case 2:
1147         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1148         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1149         break;
1150     case 3:
1151         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1152         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1153         break;
1154     }
1155 }
1156
1157 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1158 {
1159     int ez      = s->globalmc[ref].zrs_exp;
1160     int ep      = s->globalmc[ref].perspective_exp;
1161     int (*A)[2] = s->globalmc[ref].zrs;
1162     int *b      = s->globalmc[ref].pan_tilt;
1163     int *c      = s->globalmc[ref].perspective;
1164
1165     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1166     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1167     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1168
1169     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1170     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1171 }
1172
1173 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1174                                 int stride, int x, int y)
1175 {
1176     int i;
1177
1178     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1179     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1180
1181     if (s->num_refs == 2) {
1182         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1183         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1184     }
1185
1186     if (!block->ref) {
1187         pred_block_dc(block, stride, x, y);
1188         for (i = 0; i < 3; i++)
1189             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1190         return;
1191     }
1192
1193     if (s->globalmc_flag) {
1194         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1195         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1196     }
1197
1198     for (i = 0; i < s->num_refs; i++)
1199         if (block->ref & (i+1)) {
1200             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1201                 global_mv(s, block, x, y, i);
1202             } else {
1203                 pred_mv(block, stride, x, y, i);
1204                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1205                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1206             }
1207         }
1208 }
1209
1210 /**
1211  * Copies the current block to the other blocks covered by the current superblock split mode
1212  */
1213 static void propagate_block_data(DiracBlock *block, int stride, int size)
1214 {
1215     int x, y;
1216     DiracBlock *dst = block;
1217
1218     for (x = 1; x < size; x++)
1219         dst[x] = *block;
1220
1221     for (y = 1; y < size; y++) {
1222         dst += stride;
1223         for (x = 0; x < size; x++)
1224             dst[x] = *block;
1225     }
1226 }
1227
1228 /**
1229  * Dirac Specification ->
1230  * 12. Block motion data syntax
1231  */
1232 static int dirac_unpack_block_motion_data(DiracContext *s)
1233 {
1234     GetBitContext *gb = &s->gb;
1235     uint8_t *sbsplit = s->sbsplit;
1236     int i, x, y, q, p;
1237     DiracArith arith[8];
1238
1239     align_get_bits(gb);
1240
1241     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1242     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1243     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1244     s->blwidth  = 4 * s->sbwidth;
1245     s->blheight = 4 * s->sbheight;
1246
1247     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1248        decode superblock split modes */
1249     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1250     for (y = 0; y < s->sbheight; y++) {
1251         for (x = 0; x < s->sbwidth; x++) {
1252             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1253             if (split > 2)
1254                 return -1;
1255             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1256         }
1257         sbsplit += s->sbwidth;
1258     }
1259
1260     /* setup arith decoding */
1261     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1262     for (i = 0; i < s->num_refs; i++) {
1263         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1264         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1265     }
1266     for (i = 0; i < 3; i++)
1267         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1268
1269     for (y = 0; y < s->sbheight; y++)
1270         for (x = 0; x < s->sbwidth; x++) {
1271             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1272             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1273
1274             for (q = 0; q < blkcnt; q++)
1275                 for (p = 0; p < blkcnt; p++) {
1276                     int bx = 4 * x + p*step;
1277                     int by = 4 * y + q*step;
1278                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1279                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1280                     propagate_block_data(block, s->blwidth, step);
1281                 }
1282         }
1283
1284     return 0;
1285 }
1286
1287 static int weight(int i, int blen, int offset)
1288 {
1289 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1290     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1291
1292     if (i < 2*offset)
1293         return ROLLOFF(i);
1294     else if (i > blen-1 - 2*offset)
1295         return ROLLOFF(blen-1 - i);
1296     return 8;
1297 }
1298
1299 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1300                                  int left, int right, int wy)
1301 {
1302     int x;
1303     for (x = 0; left && x < p->xblen >> 1; x++)
1304         obmc_weight[x] = wy*8;
1305     for (; x < p->xblen >> right; x++)
1306         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1307     for (; x < p->xblen; x++)
1308         obmc_weight[x] = wy*8;
1309     for (; x < stride; x++)
1310         obmc_weight[x] = 0;
1311 }
1312
1313 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1314                              int left, int right, int top, int bottom)
1315 {
1316     int y;
1317     for (y = 0; top && y < p->yblen >> 1; y++) {
1318         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1319         obmc_weight += stride;
1320     }
1321     for (; y < p->yblen >> bottom; y++) {
1322         int wy = weight(y, p->yblen, p->yoffset);
1323         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1324         obmc_weight += stride;
1325     }
1326     for (; y < p->yblen; y++) {
1327         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1328         obmc_weight += stride;
1329     }
1330 }
1331
1332 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1333 {
1334     int top = !by;
1335     int bottom = by == s->blheight-1;
1336
1337     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1338     if (top || bottom || by == 1) {
1339         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1340         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1341         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1342     }
1343 }
1344
1345 static const uint8_t epel_weights[4][4][4] = {
1346     {{ 16,  0,  0,  0 },
1347      { 12,  4,  0,  0 },
1348      {  8,  8,  0,  0 },
1349      {  4, 12,  0,  0 }},
1350     {{ 12,  0,  4,  0 },
1351      {  9,  3,  3,  1 },
1352      {  6,  6,  2,  2 },
1353      {  3,  9,  1,  3 }},
1354     {{  8,  0,  8,  0 },
1355      {  6,  2,  6,  2 },
1356      {  4,  4,  4,  4 },
1357      {  2,  6,  2,  6 }},
1358     {{  4,  0, 12,  0 },
1359      {  3,  1,  9,  3 },
1360      {  2,  2,  6,  6 },
1361      {  1,  3,  3,  9 }}
1362 };
1363
1364 /**
1365  * For block x,y, determine which of the hpel planes to do bilinear
1366  * interpolation from and set src[] to the location in each hpel plane
1367  * to MC from.
1368  *
1369  * @return the index of the put_dirac_pixels_tab function to use
1370  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1371  */
1372 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1373                      int x, int y, int ref, int plane)
1374 {
1375     Plane *p = &s->plane[plane];
1376     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1377     int motion_x = block->u.mv[ref][0];
1378     int motion_y = block->u.mv[ref][1];
1379     int mx, my, i, epel, nplanes = 0;
1380
1381     if (plane) {
1382         motion_x >>= s->chroma_x_shift;
1383         motion_y >>= s->chroma_y_shift;
1384     }
1385
1386     mx         = motion_x & ~(-1U << s->mv_precision);
1387     my         = motion_y & ~(-1U << s->mv_precision);
1388     motion_x >>= s->mv_precision;
1389     motion_y >>= s->mv_precision;
1390     /* normalize subpel coordinates to epel */
1391     /* TODO: template this function? */
1392     mx      <<= 3 - s->mv_precision;
1393     my      <<= 3 - s->mv_precision;
1394
1395     x += motion_x;
1396     y += motion_y;
1397     epel = (mx|my)&1;
1398
1399     /* hpel position */
1400     if (!((mx|my)&3)) {
1401         nplanes = 1;
1402         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1403     } else {
1404         /* qpel or epel */
1405         nplanes = 4;
1406         for (i = 0; i < 4; i++)
1407             src[i] = ref_hpel[i] + y*p->stride + x;
1408
1409         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1410            we increment x/y because the edge changes for half of the pixels */
1411         if (mx > 4) {
1412             src[0] += 1;
1413             src[2] += 1;
1414             x++;
1415         }
1416         if (my > 4) {
1417             src[0] += p->stride;
1418             src[1] += p->stride;
1419             y++;
1420         }
1421
1422         /* hpel planes are:
1423            [0]: F  [1]: H
1424            [2]: V  [3]: C */
1425         if (!epel) {
1426             /* check if we really only need 2 planes since either mx or my is
1427                a hpel position. (epel weights of 0 handle this there) */
1428             if (!(mx&3)) {
1429                 /* mx == 0: average [0] and [2]
1430                    mx == 4: average [1] and [3] */
1431                 src[!mx] = src[2 + !!mx];
1432                 nplanes = 2;
1433             } else if (!(my&3)) {
1434                 src[0] = src[(my>>1)  ];
1435                 src[1] = src[(my>>1)+1];
1436                 nplanes = 2;
1437             }
1438         } else {
1439             /* adjust the ordering if needed so the weights work */
1440             if (mx > 4) {
1441                 FFSWAP(const uint8_t *, src[0], src[1]);
1442                 FFSWAP(const uint8_t *, src[2], src[3]);
1443             }
1444             if (my > 4) {
1445                 FFSWAP(const uint8_t *, src[0], src[2]);
1446                 FFSWAP(const uint8_t *, src[1], src[3]);
1447             }
1448             src[4] = epel_weights[my&3][mx&3];
1449         }
1450     }
1451
1452     /* fixme: v/h _edge_pos */
1453     if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1454         y + p->yblen > p->height+EDGE_WIDTH/2 ||
1455         x < 0 || y < 0) {
1456         for (i = 0; i < nplanes; i++) {
1457             ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1458                                 p->stride, p->stride,
1459                                 p->xblen, p->yblen, x, y,
1460                                 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1461             src[i] = s->edge_emu_buffer[i];
1462         }
1463     }
1464     return (nplanes>>1) + epel;
1465 }
1466
1467 static void add_dc(uint16_t *dst, int dc, int stride,
1468                    uint8_t *obmc_weight, int xblen, int yblen)
1469 {
1470     int x, y;
1471     dc += 128;
1472
1473     for (y = 0; y < yblen; y++) {
1474         for (x = 0; x < xblen; x += 2) {
1475             dst[x  ] += dc * obmc_weight[x  ];
1476             dst[x+1] += dc * obmc_weight[x+1];
1477         }
1478         dst          += stride;
1479         obmc_weight  += MAX_BLOCKSIZE;
1480     }
1481 }
1482
1483 static void block_mc(DiracContext *s, DiracBlock *block,
1484                      uint16_t *mctmp, uint8_t *obmc_weight,
1485                      int plane, int dstx, int dsty)
1486 {
1487     Plane *p = &s->plane[plane];
1488     const uint8_t *src[5];
1489     int idx;
1490
1491     switch (block->ref&3) {
1492     case 0: /* DC */
1493         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1494         return;
1495     case 1:
1496     case 2:
1497         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1498         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1499         if (s->weight_func)
1500             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1501                            s->weight[0] + s->weight[1], p->yblen);
1502         break;
1503     case 3:
1504         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1505         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1506         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1507         if (s->biweight_func) {
1508             /* fixme: +32 is a quick hack */
1509             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1510             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1511                              s->weight[0], s->weight[1], p->yblen);
1512         } else
1513             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1514         break;
1515     }
1516     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1517 }
1518
1519 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1520 {
1521     Plane *p = &s->plane[plane];
1522     int x, dstx = p->xbsep - p->xoffset;
1523
1524     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1525     mctmp += p->xbsep;
1526
1527     for (x = 1; x < s->blwidth-1; x++) {
1528         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1529         dstx  += p->xbsep;
1530         mctmp += p->xbsep;
1531     }
1532     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1533 }
1534
1535 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1536 {
1537     int idx = 0;
1538     if (xblen > 8)
1539         idx = 1;
1540     if (xblen > 16)
1541         idx = 2;
1542
1543     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1544     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1545     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1546     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1547         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1548         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1549     } else {
1550         s->weight_func   = NULL;
1551         s->biweight_func = NULL;
1552     }
1553 }
1554
1555 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1556 {
1557     /* chroma allocates an edge of 8 when subsampled
1558        which for 4:2:2 means an h edge of 16 and v edge of 8
1559        just use 8 for everything for the moment */
1560     int i, edge = EDGE_WIDTH/2;
1561
1562     ref->hpel[plane][0] = ref->avframe->data[plane];
1563     s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1564
1565     /* no need for hpel if we only have fpel vectors */
1566     if (!s->mv_precision)
1567         return;
1568
1569     for (i = 1; i < 4; i++) {
1570         if (!ref->hpel_base[plane][i])
1571             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1572         /* we need to be 16-byte aligned even for chroma */
1573         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1574     }
1575
1576     if (!ref->interpolated[plane]) {
1577         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1578                                       ref->hpel[plane][3], ref->hpel[plane][0],
1579                                       ref->avframe->linesize[plane], width, height);
1580         s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1581         s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1582         s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1583     }
1584     ref->interpolated[plane] = 1;
1585 }
1586
1587 /**
1588  * Dirac Specification ->
1589  * 13.0 Transform data syntax. transform_data()
1590  */
1591 static int dirac_decode_frame_internal(DiracContext *s)
1592 {
1593     DWTContext d;
1594     int y, i, comp, dsty;
1595
1596     if (s->low_delay) {
1597         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1598         for (comp = 0; comp < 3; comp++) {
1599             Plane *p = &s->plane[comp];
1600             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1601         }
1602         if (!s->zero_res)
1603             decode_lowdelay(s);
1604     }
1605
1606     for (comp = 0; comp < 3; comp++) {
1607         Plane *p       = &s->plane[comp];
1608         uint8_t *frame = s->current_picture->avframe->data[comp];
1609
1610         /* FIXME: small resolutions */
1611         for (i = 0; i < 4; i++)
1612             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1613
1614         if (!s->zero_res && !s->low_delay)
1615         {
1616             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1617             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1618         }
1619         if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1620                                   s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1621             return -1;
1622
1623         if (!s->num_refs) { /* intra */
1624             for (y = 0; y < p->height; y += 16) {
1625                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1626                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1627                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1628             }
1629         } else { /* inter */
1630             int rowheight = p->ybsep*p->stride;
1631
1632             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1633
1634             for (i = 0; i < s->num_refs; i++)
1635                 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1636
1637             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1638
1639             dsty = -p->yoffset;
1640             for (y = 0; y < s->blheight; y++) {
1641                 int h     = 0,
1642                     start = FFMAX(dsty, 0);
1643                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1644                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1645
1646                 init_obmc_weights(s, p, y);
1647
1648                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1649                     h = p->height - start;
1650                 else
1651                     h = p->ybsep - (start - dsty);
1652                 if (h < 0)
1653                     break;
1654
1655                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1656                 mc_row(s, blocks, mctmp, comp, dsty);
1657
1658                 mctmp += (start - dsty)*p->stride + p->xoffset;
1659                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1660                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1661                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1662
1663                 dsty += p->ybsep;
1664             }
1665         }
1666     }
1667
1668
1669     return 0;
1670 }
1671
1672 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1673 {
1674     int ret, i;
1675     int chroma_x_shift, chroma_y_shift;
1676     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1677
1678     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1679     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1680     ret = ff_get_buffer(avctx, f, flags);
1681     if (ret < 0)
1682         return ret;
1683
1684     for (i = 0; f->data[i]; i++) {
1685         int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1686                      f->linesize[i] + 32;
1687         f->data[i] += offset;
1688     }
1689     f->width  = avctx->width;
1690     f->height = avctx->height;
1691
1692     return 0;
1693 }
1694
1695 /**
1696  * Dirac Specification ->
1697  * 11.1.1 Picture Header. picture_header()
1698  */
1699 static int dirac_decode_picture_header(DiracContext *s)
1700 {
1701     int retire, picnum;
1702     int i, j, refnum, refdist;
1703     GetBitContext *gb = &s->gb;
1704
1705     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1706     picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1707
1708
1709     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1710
1711     /* if this is the first keyframe after a sequence header, start our
1712        reordering from here */
1713     if (s->frame_number < 0)
1714         s->frame_number = picnum;
1715
1716     s->ref_pics[0] = s->ref_pics[1] = NULL;
1717     for (i = 0; i < s->num_refs; i++) {
1718         refnum = picnum + dirac_get_se_golomb(gb);
1719         refdist = INT_MAX;
1720
1721         /* find the closest reference to the one we want */
1722         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1723         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1724             if (s->ref_frames[j]
1725                 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1726                 s->ref_pics[i] = s->ref_frames[j];
1727                 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1728             }
1729
1730         if (!s->ref_pics[i] || refdist)
1731             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1732
1733         /* if there were no references at all, allocate one */
1734         if (!s->ref_pics[i])
1735             for (j = 0; j < MAX_FRAMES; j++)
1736                 if (!s->all_frames[j].avframe->data[0]) {
1737                     s->ref_pics[i] = &s->all_frames[j];
1738                     get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1739                     break;
1740                 }
1741     }
1742
1743     /* retire the reference frames that are not used anymore */
1744     if (s->current_picture->avframe->reference) {
1745         retire = picnum + dirac_get_se_golomb(gb);
1746         if (retire != picnum) {
1747             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1748
1749             if (retire_pic)
1750                 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1751             else
1752                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1753         }
1754
1755         /* if reference array is full, remove the oldest as per the spec */
1756         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1757             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1758             remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1759         }
1760     }
1761
1762     if (s->num_refs) {
1763         if (dirac_unpack_prediction_parameters(s))  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1764             return -1;
1765         if (dirac_unpack_block_motion_data(s))      /* [DIRAC_STD] 12. Block motion data syntax                       */
1766             return -1;
1767     }
1768     if (dirac_unpack_idwt_params(s))                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1769         return -1;
1770
1771     init_planes(s);
1772     return 0;
1773 }
1774
1775 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1776 {
1777     DiracFrame *out = s->delay_frames[0];
1778     int i, out_idx  = 0;
1779     int ret;
1780
1781     /* find frame with lowest picture number */
1782     for (i = 1; s->delay_frames[i]; i++)
1783         if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1784             out     = s->delay_frames[i];
1785             out_idx = i;
1786         }
1787
1788     for (i = out_idx; s->delay_frames[i]; i++)
1789         s->delay_frames[i] = s->delay_frames[i+1];
1790
1791     if (out) {
1792         out->avframe->reference ^= DELAYED_PIC_REF;
1793         *got_frame = 1;
1794         if((ret = av_frame_ref(picture, out->avframe)) < 0)
1795             return ret;
1796     }
1797
1798     return 0;
1799 }
1800
1801 /**
1802  * Dirac Specification ->
1803  * 9.6 Parse Info Header Syntax. parse_info()
1804  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1805  */
1806 #define DATA_UNIT_HEADER_SIZE 13
1807
1808 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1809    inside the function parse_sequence() */
1810 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1811 {
1812     DiracContext *s   = avctx->priv_data;
1813     DiracFrame *pic   = NULL;
1814     int ret, i, parse_code = buf[4];
1815     unsigned tmp;
1816
1817     if (size < DATA_UNIT_HEADER_SIZE)
1818         return -1;
1819
1820     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1821
1822     if (parse_code == pc_seq_header) {
1823         if (s->seen_sequence_header)
1824             return 0;
1825
1826         /* [DIRAC_STD] 10. Sequence header */
1827         if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1828             return -1;
1829
1830         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1831
1832         if (alloc_sequence_buffers(s))
1833             return -1;
1834
1835         s->seen_sequence_header = 1;
1836     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1837         free_sequence_buffers(s);
1838         s->seen_sequence_header = 0;
1839     } else if (parse_code == pc_aux_data) {
1840         if (buf[13] == 1) {     /* encoder implementation/version */
1841             int ver[3];
1842             /* versions older than 1.0.8 don't store quant delta for
1843                subbands with only one codeblock */
1844             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1845                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1846                     s->old_delta_quant = 1;
1847         }
1848     } else if (parse_code & 0x8) {  /* picture data unit */
1849         if (!s->seen_sequence_header) {
1850             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1851             return -1;
1852         }
1853
1854         /* find an unused frame */
1855         for (i = 0; i < MAX_FRAMES; i++)
1856             if (s->all_frames[i].avframe->data[0] == NULL)
1857                 pic = &s->all_frames[i];
1858         if (!pic) {
1859             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1860             return -1;
1861         }
1862
1863         av_frame_unref(pic->avframe);
1864
1865         /* [DIRAC_STD] Defined in 9.6.1 ... */
1866         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1867         if (tmp > 2) {
1868             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1869             return -1;
1870         }
1871         s->num_refs    = tmp;
1872         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1873         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1874         pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1875         pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1876         pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1877
1878         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1879             return ret;
1880         s->current_picture = pic;
1881         s->plane[0].stride = pic->avframe->linesize[0];
1882         s->plane[1].stride = pic->avframe->linesize[1];
1883         s->plane[2].stride = pic->avframe->linesize[2];
1884
1885         if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1886             return AVERROR(ENOMEM);
1887
1888         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1889         if (dirac_decode_picture_header(s))
1890             return -1;
1891
1892         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1893         if (dirac_decode_frame_internal(s))
1894             return -1;
1895     }
1896     return 0;
1897 }
1898
1899 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1900 {
1901     DiracContext *s     = avctx->priv_data;
1902     AVFrame *picture    = data;
1903     uint8_t *buf        = pkt->data;
1904     int buf_size        = pkt->size;
1905     int i, data_unit_size, buf_idx = 0;
1906     int ret;
1907
1908     /* release unused frames */
1909     for (i = 0; i < MAX_FRAMES; i++)
1910         if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1911             av_frame_unref(s->all_frames[i].avframe);
1912             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1913         }
1914
1915     s->current_picture = NULL;
1916     *got_frame = 0;
1917
1918     /* end of stream, so flush delayed pics */
1919     if (buf_size == 0)
1920         return get_delayed_pic(s, (AVFrame *)data, got_frame);
1921
1922     for (;;) {
1923         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1924           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1925           BBCD start code search */
1926         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1927             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1928                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1929                 break;
1930         }
1931         /* BBCD found or end of data */
1932         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1933             break;
1934
1935         data_unit_size = AV_RB32(buf+buf_idx+5);
1936         if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1937             if(buf_idx + data_unit_size > buf_size)
1938             av_log(s->avctx, AV_LOG_ERROR,
1939                    "Data unit with size %d is larger than input buffer, discarding\n",
1940                    data_unit_size);
1941             buf_idx += 4;
1942             continue;
1943         }
1944         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1945         if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1946         {
1947             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1948             return -1;
1949         }
1950         buf_idx += data_unit_size;
1951     }
1952
1953     if (!s->current_picture)
1954         return buf_size;
1955
1956     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1957         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1958
1959         s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1960
1961         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1962             int min_num = s->delay_frames[0]->avframe->display_picture_number;
1963             /* Too many delayed frames, so we display the frame with the lowest pts */
1964             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1965             delayed_frame = s->delay_frames[0];
1966
1967             for (i = 1; s->delay_frames[i]; i++)
1968                 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1969                     min_num = s->delay_frames[i]->avframe->display_picture_number;
1970
1971             delayed_frame = remove_frame(s->delay_frames, min_num);
1972             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1973         }
1974
1975         if (delayed_frame) {
1976             delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1977             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1978                 return ret;
1979             *got_frame = 1;
1980         }
1981     } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1982         /* The right frame at the right time :-) */
1983         if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1984             return ret;
1985         *got_frame = 1;
1986     }
1987
1988     if (*got_frame)
1989         s->frame_number = picture->display_picture_number + 1;
1990
1991     return buf_idx;
1992 }
1993
1994 AVCodec ff_dirac_decoder = {
1995     .name           = "dirac",
1996     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1997     .type           = AVMEDIA_TYPE_VIDEO,
1998     .id             = AV_CODEC_ID_DIRAC,
1999     .priv_data_size = sizeof(DiracContext),
2000     .init           = dirac_decode_init,
2001     .close          = dirac_decode_end,
2002     .decode         = dirac_decode_frame,
2003     .capabilities   = CODEC_CAP_DELAY,
2004     .flush          = dirac_decode_flush,
2005 };