git.sesse.net Git - ffmpeg/blob - libavcodec/diracdec.c

   1 /*
   2  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
   3  * Copyright (C) 2009 David Conrad
   4  * Copyright (C) 2011 Jordi Ortiz
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file libavcodec/diracdec.c
  25  * Dirac Decoder
  26  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  27  */
  28
  29 #include "avcodec.h"
  30 #include "dsputil.h"
  31 #include "get_bits.h"
  32 #include "bytestream.h"
  33 #include "golomb.h"
  34 #include "dirac_arith.h"
  35 #include "mpeg12data.h"
  36 #include "dwt.h"
  37 #include "dirac.h"
  38 #include "diracdsp.h"
  39
  40 /**
  41  * The spec limits the number of wavelet decompositions to 4 for both
  42  * level 1 (VC-2) and 128 (long-gop default).
  43  * 5 decompositions is the maximum before >16-bit buffers are needed.
  44  * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
  45  * the others to 4 decompositions (or 3 for the fidelity filter).
  46  *
  47  * We use this instead of MAX_DECOMPOSITIONS to save some memory.
  48  */
  49 #define MAX_DWT_LEVELS 5
  50
  51 /**
  52  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  53  */
  54 #define MAX_REFERENCE_FRAMES 8
  55 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
  56 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
  57 #define MAX_QUANT 68        /* max quant for VC-2 */
  58 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
  59
  60 /**
  61  * DiracBlock->ref flags, if set then the block does MC from the given ref
  62  */
  63 #define DIRAC_REF_MASK_REF1   1
  64 #define DIRAC_REF_MASK_REF2   2
  65 #define DIRAC_REF_MASK_GLOBAL 4
  66
  67 /**
  68  * Value of Picture.reference when Picture is not a reference picture, but
  69  * is held for delayed output.
  70  */
  71 #define DELAYED_PIC_REF 4
  72
  73 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
  74
  75 #define CALC_PADDING(size, depth)                       \
  76     (((size + (1 << depth) - 1) >> depth) << depth)
  77
  78 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
  79
  80 typedef struct {
  81     AVFrame avframe;
  82     int interpolated[3];    /* 1 if hpel[] is valid */
  83     uint8_t *hpel[3][4];
  84     uint8_t *hpel_base[3][4];
  85 } DiracFrame;
  86
  87 typedef struct {
  88     union {
  89         int16_t mv[2][2];
  90         int16_t dc[3];
  91     } u; /* anonymous unions aren't in C99 :( */
  92     uint8_t ref;
  93 } DiracBlock;
  94
  95 typedef struct SubBand {
  96     int level;
  97     int orientation;
  98     int stride;
  99     int width;
 100     int height;
 101     int quant;
 102     IDWTELEM *ibuf;
 103     struct SubBand *parent;
 104
 105     /* for low delay */
 106     unsigned length;
 107     const uint8_t *coeff_data;
 108 } SubBand;
 109
 110 typedef struct Plane {
 111     int width;
 112     int height;
 113     int stride;
 114
 115     int idwt_width;
 116     int idwt_height;
 117     int idwt_stride;
 118     IDWTELEM *idwt_buf;
 119     IDWTELEM *idwt_buf_base;
 120     IDWTELEM *idwt_tmp;
 121
 122     /* block length */
 123     uint8_t xblen;
 124     uint8_t yblen;
 125     /* block separation (block n+1 starts after this many pixels in block n) */
 126     uint8_t xbsep;
 127     uint8_t ybsep;
 128     /* amount of overspill on each edge (half of the overlap between blocks) */
 129     uint8_t xoffset;
 130     uint8_t yoffset;
 131
 132     SubBand band[MAX_DWT_LEVELS][4];
 133 } Plane;
 134
 135 typedef struct DiracContext {
 136     AVCodecContext *avctx;
 137     DSPContext dsp;
 138     DiracDSPContext diracdsp;
 139     GetBitContext gb;
 140     dirac_source_params source;
 141     int seen_sequence_header;
 142     int frame_number;           /* number of the next frame to display       */
 143     Plane plane[3];
 144     int chroma_x_shift;
 145     int chroma_y_shift;
 146
 147     int zero_res;               /* zero residue flag                         */
 148     int is_arith;               /* whether coeffs use arith or golomb coding */
 149     int low_delay;              /* use the low delay syntax                  */
 150     int globalmc_flag;          /* use global motion compensation            */
 151     int num_refs;               /* number of reference pictures              */
 152
 153     /* wavelet decoding */
 154     unsigned wavelet_depth;     /* depth of the IDWT                         */
 155     unsigned wavelet_idx;
 156
 157     /**
 158      * schroedinger older than 1.0.8 doesn't store
 159      * quant delta if only one codebook exists in a band
 160      */
 161     unsigned old_delta_quant;
 162     unsigned codeblock_mode;
 163
 164     struct {
 165         unsigned width;
 166         unsigned height;
 167     } codeblock[MAX_DWT_LEVELS+1];
 168
 169     struct {
 170         unsigned num_x;         /* number of horizontal slices               */
 171         unsigned num_y;         /* number of vertical slices                 */
 172         AVRational bytes;       /* average bytes per slice                   */
 173         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
 174     } lowdelay;
 175
 176     struct {
 177         int pan_tilt[2];        /* pan/tilt vector                           */
 178         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
 179         int perspective[2];     /* perspective vector                        */
 180         unsigned zrs_exp;
 181         unsigned perspective_exp;
 182     } globalmc[2];
 183
 184     /* motion compensation */
 185     uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
 186     int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
 187     unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
 188
 189     int blwidth;                /* number of blocks (horizontally)           */
 190     int blheight;               /* number of blocks (vertically)             */
 191     int sbwidth;                /* number of superblocks (horizontally)      */
 192     int sbheight;               /* number of superblocks (vertically)        */
 193
 194     uint8_t *sbsplit;
 195     DiracBlock *blmotion;
 196
 197     uint8_t *edge_emu_buffer[4];
 198     uint8_t *edge_emu_buffer_base;
 199
 200     uint16_t *mctmp;            /* buffer holding the MC data multipled by OBMC weights */
 201     uint8_t *mcscratch;
 202
 203     DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
 204
 205     void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 206     void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 207     void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 208     dirac_weight_func weight_func;
 209     dirac_biweight_func biweight_func;
 210
 211     DiracFrame *current_picture;
 212     DiracFrame *ref_pics[2];
 213
 214     DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
 215     DiracFrame *delay_frames[MAX_DELAY+1];
 216     DiracFrame all_frames[MAX_FRAMES];
 217 } DiracContext;
 218
 219 /**
 220  * Dirac Specification ->
 221  * Parse code values. 9.6.1 Table 9.1
 222  */
 223 enum dirac_parse_code {
 224     pc_seq_header         = 0x00,
 225     pc_eos                = 0x10,
 226     pc_aux_data           = 0x20,
 227     pc_padding            = 0x30,
 228 };
 229
 230 enum dirac_subband {
 231     subband_ll = 0,
 232     subband_hl = 1,
 233     subband_lh = 2,
 234     subband_hh = 3
 235 };
 236
 237 static const uint8_t default_qmat[][4][4] = {
 238     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 239     { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
 240     { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
 241     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 242     { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
 243     { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
 244     { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
 245 };
 246
 247 static const int qscale_tab[MAX_QUANT+1] = {
 248     4,     5,     6,     7,     8,    10,    11,    13,
 249     16,    19,    23,    27,    32,    38,    45,    54,
 250     64,    76,    91,   108,   128,   152,   181,   215,
 251     256,   304,   362,   431,   512,   609,   724,   861,
 252     1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
 253     4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
 254     16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
 255     65536, 77936
 256 };
 257
 258 static const int qoffset_intra_tab[MAX_QUANT+1] = {
 259     1,     2,     3,     4,     4,     5,     6,     7,
 260     8,    10,    12,    14,    16,    19,    23,    27,
 261     32,    38,    46,    54,    64,    76,    91,   108,
 262     128,   152,   181,   216,   256,   305,   362,   431,
 263     512,   609,   724,   861,  1024,  1218,  1448,  1722,
 264     2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
 265     8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
 266     32768, 38968
 267 };
 268
 269 static const int qoffset_inter_tab[MAX_QUANT+1] = {
 270     1,     2,     2,     3,     3,     4,     4,     5,
 271     6,     7,     9,    10,    12,    14,    17,    20,
 272     24,    29,    34,    41,    48,    57,    68,    81,
 273     96,   114,   136,   162,   192,   228,   272,   323,
 274     384,   457,   543,   646,   768,   913,  1086,  1292,
 275     1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
 276     6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
 277     24576, 29226
 278 };
 279
 280 /* magic number division by 3 from schroedinger */
 281 static inline int divide3(int x)
 282 {
 283     return ((x+1)*21845 + 10922) >> 16;
 284 }
 285
 286 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
 287 {
 288     DiracFrame *remove_pic = NULL;
 289     int i, remove_idx = -1;
 290
 291     for (i = 0; framelist[i]; i++)
 292         if (framelist[i]->avframe.display_picture_number == picnum) {
 293             remove_pic = framelist[i];
 294             remove_idx = i;
 295         }
 296
 297     if (remove_pic)
 298         for (i = remove_idx; framelist[i]; i++)
 299             framelist[i] = framelist[i+1];
 300
 301     return remove_pic;
 302 }
 303
 304 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 305 {
 306     int i;
 307     for (i = 0; i < maxframes; i++)
 308         if (!framelist[i]) {
 309             framelist[i] = frame;
 310             return 0;
 311         }
 312     return -1;
 313 }
 314
 315 static int alloc_sequence_buffers(DiracContext *s)
 316 {
 317     int sbwidth  = DIVRNDUP(s->source.width,  4);
 318     int sbheight = DIVRNDUP(s->source.height, 4);
 319     int i, w, h, top_padding;
 320
 321     /* todo: think more about this / use or set Plane here */
 322     for (i = 0; i < 3; i++) {
 323         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
 324         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
 325         w = s->source.width  >> (i ? s->chroma_x_shift : 0);
 326         h = s->source.height >> (i ? s->chroma_y_shift : 0);
 327
 328         /* we allocate the max we support here since num decompositions can
 329          * change from frame to frame. Stride is aligned to 16 for SIMD, and
 330          * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
 331          * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
 332          * on each side */
 333         top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
 334         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
 335         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 336
 337         s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
 338         s->plane[i].idwt_tmp      = av_malloc((w+16) * sizeof(IDWTELEM));
 339         s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
 340         if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
 341             return AVERROR(ENOMEM);
 342     }
 343
 344     w = s->source.width;
 345     h = s->source.height;
 346
 347     /* fixme: allocate using real stride here */
 348     s->sbsplit  = av_malloc(sbwidth * sbheight);
 349     s->blmotion = av_malloc(sbwidth * sbheight * 4 * sizeof(*s->blmotion));
 350     s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
 351
 352     s->mctmp     = av_malloc((w+64+MAX_BLOCKSIZE) * (h*MAX_BLOCKSIZE) * sizeof(*s->mctmp));
 353     s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
 354
 355     if (!s->sbsplit || !s->blmotion)
 356         return AVERROR(ENOMEM);
 357     return 0;
 358 }
 359
 360 static void free_sequence_buffers(DiracContext *s)
 361 {
 362     int i, j, k;
 363
 364     for (i = 0; i < MAX_FRAMES; i++) {
 365         if (s->all_frames[i].avframe.data[0]) {
 366             s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
 367             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
 368         }
 369
 370         for (j = 0; j < 3; j++)
 371             for (k = 1; k < 4; k++)
 372                 av_freep(&s->all_frames[i].hpel_base[j][k]);
 373     }
 374
 375     memset(s->ref_frames, 0, sizeof(s->ref_frames));
 376     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 377
 378     for (i = 0; i < 3; i++) {
 379         av_freep(&s->plane[i].idwt_buf_base);
 380         av_freep(&s->plane[i].idwt_tmp);
 381     }
 382
 383     av_freep(&s->sbsplit);
 384     av_freep(&s->blmotion);
 385     av_freep(&s->edge_emu_buffer_base);
 386
 387     av_freep(&s->mctmp);
 388     av_freep(&s->mcscratch);
 389 }
 390
 391 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 392 {
 393     DiracContext *s = avctx->priv_data;
 394     s->avctx = avctx;
 395     s->frame_number = -1;
 396
 397     if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
 398         av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
 399         return AVERROR_PATCHWELCOME;
 400     }
 401
 402     dsputil_init(&s->dsp, avctx);
 403     ff_diracdsp_init(&s->diracdsp);
 404
 405     return 0;
 406 }
 407
 408 static void dirac_decode_flush(AVCodecContext *avctx)
 409 {
 410     DiracContext *s = avctx->priv_data;
 411     free_sequence_buffers(s);
 412     s->seen_sequence_header = 0;
 413     s->frame_number = -1;
 414 }
 415
 416 static av_cold int dirac_decode_end(AVCodecContext *avctx)
 417 {
 418     dirac_decode_flush(avctx);
 419     return 0;
 420 }
 421
 422 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 423
 424 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
 425                                       SubBand *b, IDWTELEM *buf, int x, int y)
 426 {
 427     int coeff, sign;
 428     int sign_pred = 0;
 429     int pred_ctx = CTX_ZPZN_F1;
 430
 431     /* Check if the parent subband has a 0 in the corresponding position */
 432     if (b->parent)
 433         pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
 434
 435     if (b->orientation == subband_hl)
 436         sign_pred = buf[-b->stride];
 437
 438     /* Determine if the pixel has only zeros in its neighbourhood */
 439     if (x) {
 440         pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
 441         if (b->orientation == subband_lh)
 442             sign_pred = buf[-1];
 443     } else {
 444         pred_ctx += !buf[-b->stride];
 445     }
 446
 447     coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
 448     if (coeff) {
 449         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 450         sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
 451         coeff = (coeff ^ -sign) + sign;
 452     }
 453     *buf = coeff;
 454 }
 455
 456 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 457 {
 458     int sign, coeff;
 459
 460     coeff = svq3_get_ue_golomb(gb);
 461     if (coeff) {
 462         coeff = (coeff * qfactor + qoffset + 2) >> 2;
 463         sign  = get_bits1(gb);
 464         coeff = (coeff ^ -sign) + sign;
 465     }
 466     return coeff;
 467 }
 468
 469 /**
 470  * Decode the coeffs in the rectangle defined by left, right, top, bottom
 471  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
 472  */
 473 static inline void codeblock(DiracContext *s, SubBand *b,
 474                              GetBitContext *gb, DiracArith *c,
 475                              int left, int right, int top, int bottom,
 476                              int blockcnt_one, int is_arith)
 477 {
 478     int x, y, zero_block;
 479     int qoffset, qfactor;
 480     IDWTELEM *buf;
 481
 482     /* check for any coded coefficients in this codeblock */
 483     if (!blockcnt_one) {
 484         if (is_arith)
 485             zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
 486         else
 487             zero_block = get_bits1(gb);
 488
 489         if (zero_block)
 490             return;
 491     }
 492
 493     if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
 494         if (is_arith)
 495             b->quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
 496         else
 497             b->quant += dirac_get_se_golomb(gb);
 498     }
 499
 500     b->quant = FFMIN(b->quant, MAX_QUANT);
 501
 502     qfactor = qscale_tab[b->quant];
 503     /* TODO: context pointer? */
 504     if (!s->num_refs)
 505         qoffset = qoffset_intra_tab[b->quant];
 506     else
 507         qoffset = qoffset_inter_tab[b->quant];
 508
 509     buf = b->ibuf + top * b->stride;
 510     for (y = top; y < bottom; y++) {
 511         for (x = left; x < right; x++) {
 512             /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
 513             if (is_arith)
 514                 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
 515             else
 516                 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 517         }
 518         buf += b->stride;
 519     }
 520 }
 521
 522 /**
 523  * Dirac Specification ->
 524  * 13.3 intra_dc_prediction(band)
 525  */
 526 static inline void intra_dc_prediction(SubBand *b)
 527 {
 528     IDWTELEM *buf = b->ibuf;
 529     int x, y;
 530
 531     for (x = 1; x < b->width; x++)
 532         buf[x] += buf[x-1];
 533     buf += b->stride;
 534
 535     for (y = 1; y < b->height; y++) {
 536         buf[0] += buf[-b->stride];
 537
 538         for (x = 1; x < b->width; x++) {
 539             int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
 540             buf[x]  += divide3(pred);
 541         }
 542         buf += b->stride;
 543     }
 544 }
 545
 546 /**
 547  * Dirac Specification ->
 548  * 13.4.2 Non-skipped subbands.  subband_coeffs()
 549  */
 550 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
 551 {
 552     int cb_x, cb_y, left, right, top, bottom;
 553     DiracArith c;
 554     GetBitContext gb;
 555     int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
 556     int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
 557     int blockcnt_one = (cb_width + cb_height) == 2;
 558
 559     if (!b->length)
 560         return;
 561
 562     init_get_bits(&gb, b->coeff_data, b->length*8);
 563
 564     if (is_arith)
 565         ff_dirac_init_arith_decoder(&c, &gb, b->length);
 566
 567     top = 0;
 568     for (cb_y = 0; cb_y < cb_height; cb_y++) {
 569         bottom = (b->height * (cb_y+1)) / cb_height;
 570         left = 0;
 571         for (cb_x = 0; cb_x < cb_width; cb_x++) {
 572             right = (b->width * (cb_x+1)) / cb_width;
 573             codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
 574             left = right;
 575         }
 576         top = bottom;
 577     }
 578
 579     if (b->orientation == subband_ll && s->num_refs == 0)
 580         intra_dc_prediction(b);
 581 }
 582
 583 static int decode_subband_arith(AVCodecContext *avctx, void *b)
 584 {
 585     DiracContext *s = avctx->priv_data;
 586     decode_subband_internal(s, b, 1);
 587     return 0;
 588 }
 589
 590 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
 591 {
 592     DiracContext *s = avctx->priv_data;
 593     SubBand **b     = arg;
 594     decode_subband_internal(s, *b, 0);
 595     return 0;
 596 }
 597
 598 /**
 599  * Dirac Specification ->
 600  * [DIRAC_STD] 13.4.1 core_transform_data()
 601  */
 602 static void decode_component(DiracContext *s, int comp)
 603 {
 604     AVCodecContext *avctx = s->avctx;
 605     SubBand *bands[3*MAX_DWT_LEVELS+1];
 606     enum dirac_subband orientation;
 607     int level, num_bands = 0;
 608
 609     /* Unpack all subbands at all levels. */
 610     for (level = 0; level < s->wavelet_depth; level++) {
 611         for (orientation = !!level; orientation < 4; orientation++) {
 612             SubBand *b = &s->plane[comp].band[level][orientation];
 613             bands[num_bands++] = b;
 614
 615             align_get_bits(&s->gb);
 616             /* [DIRAC_STD] 13.4.2 subband() */
 617             b->length = svq3_get_ue_golomb(&s->gb);
 618             if (b->length) {
 619                 b->quant = svq3_get_ue_golomb(&s->gb);
 620                 align_get_bits(&s->gb);
 621                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
 622                 b->length = FFMIN(b->length, get_bits_left(&s->gb)/8);
 623                 skip_bits_long(&s->gb, b->length*8);
 624             }
 625         }
 626         /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
 627         if (s->is_arith)
 628             avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
 629                            NULL, 4-!!level, sizeof(SubBand));
 630     }
 631     /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
 632     if (!s->is_arith)
 633         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 634 }
 635
 636 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
 637 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
 638 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
 639                              int slice_x, int slice_y, int bits_end,
 640                              SubBand *b1, SubBand *b2)
 641 {
 642     int left   = b1->width  * slice_x    / s->lowdelay.num_x;
 643     int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
 644     int top    = b1->height * slice_y    / s->lowdelay.num_y;
 645     int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
 646
 647     int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
 648     int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
 649
 650     IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
 651     IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
 652     int x, y;
 653     /* we have to constantly check for overread since the spec explictly
 654        requires this, with the meaning that all remaining coeffs are set to 0 */
 655     if (get_bits_count(gb) >= bits_end)
 656         return;
 657
 658     for (y = top; y < bottom; y++) {
 659         for (x = left; x < right; x++) {
 660             buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 661             if (get_bits_count(gb) >= bits_end)
 662                 return;
 663             if (buf2) {
 664                 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
 665                 if (get_bits_count(gb) >= bits_end)
 666                     return;
 667             }
 668         }
 669         buf1 += b1->stride;
 670         if (buf2)
 671             buf2 += b2->stride;
 672     }
 673 }
 674
 675 struct lowdelay_slice {
 676     GetBitContext gb;
 677     int slice_x;
 678     int slice_y;
 679     int bytes;
 680 };
 681
 682
 683 /**
 684  * Dirac Specification ->
 685  * 13.5.2 Slices. slice(sx,sy)
 686  */
 687 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 688 {
 689     DiracContext *s = avctx->priv_data;
 690     struct lowdelay_slice *slice = arg;
 691     GetBitContext *gb = &slice->gb;
 692     enum dirac_subband orientation;
 693     int level, quant, chroma_bits, chroma_end;
 694
 695     int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
 696     int length_bits = av_log2(8 * slice->bytes)+1;
 697     int luma_bits   = get_bits_long(gb, length_bits);
 698     int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
 699
 700     /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
 701     for (level = 0; level < s->wavelet_depth; level++)
 702         for (orientation = !!level; orientation < 4; orientation++) {
 703             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 704             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
 705                              &s->plane[0].band[level][orientation], NULL);
 706         }
 707
 708     /* consume any unused bits from luma */
 709     skip_bits_long(gb, get_bits_count(gb) - luma_end);
 710
 711     chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
 712     chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
 713     /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
 714     for (level = 0; level < s->wavelet_depth; level++)
 715         for (orientation = !!level; orientation < 4; orientation++) {
 716             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
 717             lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
 718                              &s->plane[1].band[level][orientation],
 719                              &s->plane[2].band[level][orientation]);
 720         }
 721
 722     return 0;
 723 }
 724
 725 /**
 726  * Dirac Specification ->
 727  * 13.5.1 low_delay_transform_data()
 728  */
 729 static void decode_lowdelay(DiracContext *s)
 730 {
 731     AVCodecContext *avctx = s->avctx;
 732     int slice_x, slice_y, bytes, bufsize;
 733     const uint8_t *buf;
 734     struct lowdelay_slice *slices;
 735     int slice_num = 0;
 736
 737     slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
 738
 739     align_get_bits(&s->gb);
 740     /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
 741     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
 742     bufsize = get_bits_left(&s->gb);
 743
 744     for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
 745         for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
 746             bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
 747                 - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
 748
 749             slices[slice_num].bytes   = bytes;
 750             slices[slice_num].slice_x = slice_x;
 751             slices[slice_num].slice_y = slice_y;
 752             init_get_bits(&slices[slice_num].gb, buf, bufsize);
 753             slice_num++;
 754
 755             buf     += bytes;
 756             bufsize -= bytes*8;
 757         }
 758
 759     avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
 760                    sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
 761     intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 762     intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 763     intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
 764     av_free(slices);
 765 }
 766
 767 static void init_planes(DiracContext *s)
 768 {
 769     int i, w, h, level, orientation;
 770
 771     for (i = 0; i < 3; i++) {
 772         Plane *p = &s->plane[i];
 773
 774         p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
 775         p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
 776         p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
 777         p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
 778         p->idwt_stride = FFALIGN(p->idwt_width, 8);
 779
 780         for (level = s->wavelet_depth-1; level >= 0; level--) {
 781             w = w>>1;
 782             h = h>>1;
 783             for (orientation = !!level; orientation < 4; orientation++) {
 784                 SubBand *b = &p->band[level][orientation];
 785
 786                 b->ibuf   = p->idwt_buf;
 787                 b->level  = level;
 788                 b->stride = p->idwt_stride << (s->wavelet_depth - level);
 789                 b->width  = w;
 790                 b->height = h;
 791                 b->orientation = orientation;
 792
 793                 if (orientation & 1)
 794                     b->ibuf += w;
 795                 if (orientation > 1)
 796                     b->ibuf += b->stride>>1;
 797
 798                 if (level)
 799                     b->parent = &p->band[level-1][orientation];
 800             }
 801         }
 802
 803         if (i > 0) {
 804             p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
 805             p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
 806             p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
 807             p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
 808         }
 809
 810         p->xoffset = (p->xblen - p->xbsep)/2;
 811         p->yoffset = (p->yblen - p->ybsep)/2;
 812     }
 813 }
 814
 815 /**
 816  * Unpack the motion compensation parameters
 817  * Dirac Specification ->
 818  * 11.2 Picture prediction data. picture_prediction()
 819  */
 820 static int dirac_unpack_prediction_parameters(DiracContext *s)
 821 {
 822     static const uint8_t default_blen[] = { 4, 12, 16, 24 };
 823     static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
 824
 825     GetBitContext *gb = &s->gb;
 826     unsigned idx, ref;
 827
 828     align_get_bits(gb);
 829     /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
 830     /* Luma and Chroma are equal. 11.2.3 */
 831     idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
 832
 833     if (idx > 4) {
 834         av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
 835         return -1;
 836     }
 837
 838     if (idx == 0) {
 839         s->plane[0].xblen = svq3_get_ue_golomb(gb);
 840         s->plane[0].yblen = svq3_get_ue_golomb(gb);
 841         s->plane[0].xbsep = svq3_get_ue_golomb(gb);
 842         s->plane[0].ybsep = svq3_get_ue_golomb(gb);
 843     } else {
 844         /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
 845         s->plane[0].xblen = default_blen[idx-1];
 846         s->plane[0].yblen = default_blen[idx-1];
 847         s->plane[0].xbsep = default_bsep[idx-1];
 848         s->plane[0].ybsep = default_bsep[idx-1];
 849     }
 850     /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
 851       Calculated in function dirac_unpack_block_motion_data */
 852
 853     if (s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
 854         av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
 855         return -1;
 856     }
 857     if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
 858         av_log(s->avctx, AV_LOG_ERROR, "Block seperation greater than size\n");
 859         return -1;
 860     }
 861     if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
 862         av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
 863         return -1;
 864     }
 865
 866     /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
 867       Read motion vector precision */
 868     s->mv_precision = svq3_get_ue_golomb(gb);
 869     if (s->mv_precision > 3) {
 870         av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
 871         return -1;
 872     }
 873
 874     /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
 875       Read the global motion compensation parameters */
 876     s->globalmc_flag = get_bits1(gb);
 877     if (s->globalmc_flag) {
 878         memset(s->globalmc, 0, sizeof(s->globalmc));
 879         /* [DIRAC_STD] pan_tilt(gparams) */
 880         for (ref = 0; ref < s->num_refs; ref++) {
 881             if (get_bits1(gb)) {
 882                 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
 883                 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
 884             }
 885             /* [DIRAC_STD] zoom_rotate_shear(gparams)
 886                zoom/rotation/shear parameters */
 887             if (get_bits1(gb)) {
 888                 s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
 889                 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
 890                 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
 891                 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
 892                 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
 893             } else {
 894                 s->globalmc[ref].zrs[0][0] = 1;
 895                 s->globalmc[ref].zrs[1][1] = 1;
 896             }
 897             /* [DIRAC_STD] perspective(gparams) */
 898             if (get_bits1(gb)) {
 899                 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
 900                 s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
 901                 s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
 902             }
 903         }
 904     }
 905
 906     /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
 907       Picture prediction mode, not currently used. */
 908     if (svq3_get_ue_golomb(gb)) {
 909         av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
 910         return -1;
 911     }
 912
 913     /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
 914        just data read, weight calculation will be done later on. */
 915     s->weight_log2denom = 1;
 916     s->weight[0]        = 1;
 917     s->weight[1]        = 1;
 918
 919     if (get_bits1(gb)) {
 920         s->weight_log2denom = svq3_get_ue_golomb(gb);
 921         s->weight[0] = dirac_get_se_golomb(gb);
 922         if (s->num_refs == 2)
 923             s->weight[1] = dirac_get_se_golomb(gb);
 924     }
 925     return 0;
 926 }
 927
 928 /**
 929  * Dirac Specification ->
 930  * 11.3 Wavelet transform data. wavelet_transform()
 931  */
 932 static int dirac_unpack_idwt_params(DiracContext *s)
 933 {
 934     GetBitContext *gb = &s->gb;
 935     int i, level;
 936     unsigned tmp;
 937
 938 #define CHECKEDREAD(dst, cond, errmsg) \
 939     tmp = svq3_get_ue_golomb(gb); \
 940     if (cond) { \
 941         av_log(s->avctx, AV_LOG_ERROR, errmsg); \
 942         return -1; \
 943     }\
 944     dst = tmp;
 945
 946     align_get_bits(gb);
 947
 948     s->zero_res = s->num_refs ? get_bits1(gb) : 0;
 949     if (s->zero_res)
 950         return 0;
 951
 952     /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
 953     CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
 954
 955     CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
 956
 957     if (!s->low_delay) {
 958         /* Codeblock paramaters (core syntax only) */
 959         if (get_bits1(gb)) {
 960             for (i = 0; i <= s->wavelet_depth; i++) {
 961                 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
 962                 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
 963             }
 964
 965             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
 966         } else
 967             for (i = 0; i <= s->wavelet_depth; i++)
 968                 s->codeblock[i].width = s->codeblock[i].height = 1;
 969     } else {
 970         /* Slice parameters + quantization matrix*/
 971         /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
 972         s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
 973         s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
 974         s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
 975         s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
 976
 977         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
 978         if (get_bits1(gb)) {
 979             av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
 980             /* custom quantization matrix */
 981             s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
 982             for (level = 0; level < s->wavelet_depth; level++) {
 983                 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
 984                 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
 985                 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
 986             }
 987         } else {
 988             /* default quantization matrix */
 989             for (level = 0; level < s->wavelet_depth; level++)
 990                 for (i = 0; i < 4; i++) {
 991                     s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
 992                     /* haar with no shift differs for different depths */
 993                     if (s->wavelet_idx == 3)
 994                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
 995                 }
 996         }
 997     }
 998     return 0;
 999 }
1000
1001 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1002 {
1003     static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1004
1005     if (!(x|y))
1006         return 0;
1007     else if (!y)
1008         return sbsplit[-1];
1009     else if (!x)
1010         return sbsplit[-stride];
1011
1012     return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1013 }
1014
1015 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1016 {
1017     int pred;
1018
1019     if (!(x|y))
1020         return 0;
1021     else if (!y)
1022         return block[-1].ref & refmask;
1023     else if (!x)
1024         return block[-stride].ref & refmask;
1025
1026     /* return the majority */
1027     pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1028     return (pred >> 1) & refmask;
1029 }
1030
1031 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1032 {
1033     int i, n = 0;
1034
1035     memset(block->u.dc, 0, sizeof(block->u.dc));
1036
1037     if (x && !(block[-1].ref & 3)) {
1038         for (i = 0; i < 3; i++)
1039             block->u.dc[i] += block[-1].u.dc[i];
1040         n++;
1041     }
1042
1043     if (y && !(block[-stride].ref & 3)) {
1044         for (i = 0; i < 3; i++)
1045             block->u.dc[i] += block[-stride].u.dc[i];
1046         n++;
1047     }
1048
1049     if (x && y && !(block[-1-stride].ref & 3)) {
1050         for (i = 0; i < 3; i++)
1051             block->u.dc[i] += block[-1-stride].u.dc[i];
1052         n++;
1053     }
1054
1055     if (n == 2) {
1056         for (i = 0; i < 3; i++)
1057             block->u.dc[i] = (block->u.dc[i]+1)>>1;
1058     } else if (n == 3) {
1059         for (i = 0; i < 3; i++)
1060             block->u.dc[i] = divide3(block->u.dc[i]);
1061     }
1062 }
1063
1064 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1065 {
1066     int16_t *pred[3];
1067     int refmask = ref+1;
1068     int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
1069     int n = 0;
1070
1071     if (x && (block[-1].ref & mask) == refmask)
1072         pred[n++] = block[-1].u.mv[ref];
1073
1074     if (y && (block[-stride].ref & mask) == refmask)
1075         pred[n++] = block[-stride].u.mv[ref];
1076
1077     if (x && y && (block[-stride-1].ref & mask) == refmask)
1078         pred[n++] = block[-stride-1].u.mv[ref];
1079
1080     switch (n) {
1081     case 0:
1082         block->u.mv[ref][0] = 0;
1083         block->u.mv[ref][1] = 0;
1084         break;
1085     case 1:
1086         block->u.mv[ref][0] = pred[0][0];
1087         block->u.mv[ref][1] = pred[0][1];
1088         break;
1089     case 2:
1090         block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1091         block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1092         break;
1093     case 3:
1094         block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1095         block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1096         break;
1097     }
1098 }
1099
1100 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1101 {
1102     int ez      = s->globalmc[ref].zrs_exp;
1103     int ep      = s->globalmc[ref].perspective_exp;
1104     int (*A)[2] = s->globalmc[ref].zrs;
1105     int *b      = s->globalmc[ref].pan_tilt;
1106     int *c      = s->globalmc[ref].perspective;
1107
1108     int m       = (1<<ep) - (c[0]*x + c[1]*y);
1109     int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1110     int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1111
1112     block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1113     block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1114 }
1115
1116 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1117                                 int stride, int x, int y)
1118 {
1119     int i;
1120
1121     block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1122     block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1123
1124     if (s->num_refs == 2) {
1125         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1126         block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1127     }
1128
1129     if (!block->ref) {
1130         pred_block_dc(block, stride, x, y);
1131         for (i = 0; i < 3; i++)
1132             block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1133         return;
1134     }
1135
1136     if (s->globalmc_flag) {
1137         block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1138         block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1139     }
1140
1141     for (i = 0; i < s->num_refs; i++)
1142         if (block->ref & (i+1)) {
1143             if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1144                 global_mv(s, block, x, y, i);
1145             } else {
1146                 pred_mv(block, stride, x, y, i);
1147                 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1148                 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1149             }
1150         }
1151 }
1152
1153 /**
1154  * Copies the current block to the other blocks covered by the current superblock split mode
1155  */
1156 static void propagate_block_data(DiracBlock *block, int stride, int size)
1157 {
1158     int x, y;
1159     DiracBlock *dst = block;
1160
1161     for (x = 1; x < size; x++)
1162         dst[x] = *block;
1163
1164     for (y = 1; y < size; y++) {
1165         dst += stride;
1166         for (x = 0; x < size; x++)
1167             dst[x] = *block;
1168     }
1169 }
1170
1171 /**
1172  * Dirac Specification ->
1173  * 12. Block motion data syntax
1174  */
1175 static int dirac_unpack_block_motion_data(DiracContext *s)
1176 {
1177     GetBitContext *gb = &s->gb;
1178     uint8_t *sbsplit = s->sbsplit;
1179     int i, x, y, q, p;
1180     DiracArith arith[8];
1181
1182     align_get_bits(gb);
1183
1184     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1185     s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
1186     s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1187     s->blwidth  = 4 * s->sbwidth;
1188     s->blheight = 4 * s->sbheight;
1189
1190     /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1191        decode superblock split modes */
1192     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
1193     for (y = 0; y < s->sbheight; y++) {
1194         for (x = 0; x < s->sbwidth; x++) {
1195             unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1196             if (split > 2)
1197                 return -1;
1198             sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1199         }
1200         sbsplit += s->sbwidth;
1201     }
1202
1203     /* setup arith decoding */
1204     ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1205     for (i = 0; i < s->num_refs; i++) {
1206         ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1207         ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1208     }
1209     for (i = 0; i < 3; i++)
1210         ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1211
1212     for (y = 0; y < s->sbheight; y++)
1213         for (x = 0; x < s->sbwidth; x++) {
1214             int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1215             int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
1216
1217             for (q = 0; q < blkcnt; q++)
1218                 for (p = 0; p < blkcnt; p++) {
1219                     int bx = 4 * x + p*step;
1220                     int by = 4 * y + q*step;
1221                     DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1222                     decode_block_params(s, arith, block, s->blwidth, bx, by);
1223                     propagate_block_data(block, s->blwidth, step);
1224                 }
1225         }
1226
1227     return 0;
1228 }
1229
1230 static int weight(int i, int blen, int offset)
1231 {
1232 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
1233     (1 + (6*(i) + offset - 1) / (2*offset - 1))
1234
1235     if (i < 2*offset)
1236         return ROLLOFF(i);
1237     else if (i > blen-1 - 2*offset)
1238         return ROLLOFF(blen-1 - i);
1239     return 8;
1240 }
1241
1242 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1243                                  int left, int right, int wy)
1244 {
1245     int x;
1246     for (x = 0; left && x < p->xblen >> 1; x++)
1247         obmc_weight[x] = wy*8;
1248     for (; x < p->xblen >> right; x++)
1249         obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1250     for (; x < p->xblen; x++)
1251         obmc_weight[x] = wy*8;
1252     for (; x < stride; x++)
1253         obmc_weight[x] = 0;
1254 }
1255
1256 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1257                              int left, int right, int top, int bottom)
1258 {
1259     int y;
1260     for (y = 0; top && y < p->yblen >> 1; y++) {
1261         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1262         obmc_weight += stride;
1263     }
1264     for (; y < p->yblen >> bottom; y++) {
1265         int wy = weight(y, p->yblen, p->yoffset);
1266         init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1267         obmc_weight += stride;
1268     }
1269     for (; y < p->yblen; y++) {
1270         init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1271         obmc_weight += stride;
1272     }
1273 }
1274
1275 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1276 {
1277     int top = !by;
1278     int bottom = by == s->blheight-1;
1279
1280     /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1281     if (top || bottom || by == 1) {
1282         init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1283         init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1284         init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1285     }
1286 }
1287
1288 static const uint8_t epel_weights[4][4][4] = {
1289     {{ 16,  0,  0,  0 },
1290      { 12,  4,  0,  0 },
1291      {  8,  8,  0,  0 },
1292      {  4, 12,  0,  0 }},
1293     {{ 12,  0,  4,  0 },
1294      {  9,  3,  3,  1 },
1295      {  6,  6,  2,  2 },
1296      {  3,  9,  1,  3 }},
1297     {{  8,  0,  8,  0 },
1298      {  6,  2,  6,  2 },
1299      {  4,  4,  4,  4 },
1300      {  2,  6,  2,  6 }},
1301     {{  4,  0, 12,  0 },
1302      {  3,  1,  9,  3 },
1303      {  2,  2,  6,  6 },
1304      {  1,  3,  3,  9 }}
1305 };
1306
1307 /**
1308  * For block x,y, determine which of the hpel planes to do bilinear
1309  * interpolation from and set src[] to the location in each hpel plane
1310  * to MC from.
1311  *
1312  * @return the index of the put_dirac_pixels_tab function to use
1313  *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1314  */
1315 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1316                      int x, int y, int ref, int plane)
1317 {
1318     Plane *p = &s->plane[plane];
1319     uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1320     int motion_x = block->u.mv[ref][0];
1321     int motion_y = block->u.mv[ref][1];
1322     int mx, my, i, epel, nplanes = 0;
1323
1324     if (plane) {
1325         motion_x >>= s->chroma_x_shift;
1326         motion_y >>= s->chroma_y_shift;
1327     }
1328
1329     mx         = motion_x & ~(-1 << s->mv_precision);
1330     my         = motion_y & ~(-1 << s->mv_precision);
1331     motion_x >>= s->mv_precision;
1332     motion_y >>= s->mv_precision;
1333     /* normalize subpel coordinates to epel */
1334     /* TODO: template this function? */
1335     mx      <<= 3 - s->mv_precision;
1336     my      <<= 3 - s->mv_precision;
1337
1338     x += motion_x;
1339     y += motion_y;
1340     epel = (mx|my)&1;
1341
1342     /* hpel position */
1343     if (!((mx|my)&3)) {
1344         nplanes = 1;
1345         src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1346     } else {
1347         /* qpel or epel */
1348         nplanes = 4;
1349         for (i = 0; i < 4; i++)
1350             src[i] = ref_hpel[i] + y*p->stride + x;
1351
1352         /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1353            we increment x/y because the edge changes for half of the pixels */
1354         if (mx > 4) {
1355             src[0] += 1;
1356             src[2] += 1;
1357             x++;
1358         }
1359         if (my > 4) {
1360             src[0] += p->stride;
1361             src[1] += p->stride;
1362             y++;
1363         }
1364
1365         /* hpel planes are:
1366            [0]: F  [1]: H
1367            [2]: V  [3]: C */
1368         if (!epel) {
1369             /* check if we really only need 2 planes since either mx or my is
1370                a hpel position. (epel weights of 0 handle this there) */
1371             if (!(mx&3)) {
1372                 /* mx == 0: average [0] and [2]
1373                    mx == 4: average [1] and [3] */
1374                 src[!mx] = src[2 + !!mx];
1375                 nplanes = 2;
1376             } else if (!(my&3)) {
1377                 src[0] = src[(my>>1)  ];
1378                 src[1] = src[(my>>1)+1];
1379                 nplanes = 2;
1380             }
1381         } else {
1382             /* adjust the ordering if needed so the weights work */
1383             if (mx > 4) {
1384                 FFSWAP(const uint8_t *, src[0], src[1]);
1385                 FFSWAP(const uint8_t *, src[2], src[3]);
1386             }
1387             if (my > 4) {
1388                 FFSWAP(const uint8_t *, src[0], src[2]);
1389                 FFSWAP(const uint8_t *, src[1], src[3]);
1390             }
1391             src[4] = epel_weights[my&3][mx&3];
1392         }
1393     }
1394
1395     /* fixme: v/h _edge_pos */
1396     if ((unsigned)x > p->width +EDGE_WIDTH/2 - p->xblen ||
1397         (unsigned)y > p->height+EDGE_WIDTH/2 - p->yblen) {
1398         for (i = 0; i < nplanes; i++) {
1399             ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1400                                 p->xblen, p->yblen, x, y,
1401                                 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1402             src[i] = s->edge_emu_buffer[i];
1403         }
1404     }
1405     return (nplanes>>1) + epel;
1406 }
1407
1408 static void add_dc(uint16_t *dst, int dc, int stride,
1409                    uint8_t *obmc_weight, int xblen, int yblen)
1410 {
1411     int x, y;
1412     dc += 128;
1413
1414     for (y = 0; y < yblen; y++) {
1415         for (x = 0; x < xblen; x += 2) {
1416             dst[x  ] += dc * obmc_weight[x  ];
1417             dst[x+1] += dc * obmc_weight[x+1];
1418         }
1419         dst          += stride;
1420         obmc_weight  += MAX_BLOCKSIZE;
1421     }
1422 }
1423
1424 static void block_mc(DiracContext *s, DiracBlock *block,
1425                      uint16_t *mctmp, uint8_t *obmc_weight,
1426                      int plane, int dstx, int dsty)
1427 {
1428     Plane *p = &s->plane[plane];
1429     const uint8_t *src[5];
1430     int idx;
1431
1432     switch (block->ref&3) {
1433     case 0: /* DC */
1434         add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1435         return;
1436     case 1:
1437     case 2:
1438         idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1439         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1440         if (s->weight_func)
1441             s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1442                            s->weight[0] + s->weight[1], p->yblen);
1443         break;
1444     case 3:
1445         idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1446         s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1447         idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1448         if (s->biweight_func) {
1449             /* fixme: +32 is a quick hack */
1450             s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1451             s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1452                              s->weight[0], s->weight[1], p->yblen);
1453         } else
1454             s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1455         break;
1456     }
1457     s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1458 }
1459
1460 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1461 {
1462     Plane *p = &s->plane[plane];
1463     int x, dstx = p->xbsep - p->xoffset;
1464
1465     block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1466     mctmp += p->xbsep;
1467
1468     for (x = 1; x < s->blwidth-1; x++) {
1469         block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1470         dstx  += p->xbsep;
1471         mctmp += p->xbsep;
1472     }
1473     block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1474 }
1475
1476 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1477 {
1478     int idx = 0;
1479     if (xblen > 8)
1480         idx = 1;
1481     if (xblen > 16)
1482         idx = 2;
1483
1484     memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1485     memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1486     s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1487     if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1488         s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
1489         s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1490     } else {
1491         s->weight_func   = NULL;
1492         s->biweight_func = NULL;
1493     }
1494 }
1495
1496 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1497 {
1498     /* chroma allocates an edge of 8 when subsampled
1499        which for 4:2:2 means an h edge of 16 and v edge of 8
1500        just use 8 for everything for the moment */
1501     int i, edge = EDGE_WIDTH/2;
1502
1503     ref->hpel[plane][0] = ref->avframe.data[plane];
1504     s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1505
1506     /* no need for hpel if we only have fpel vectors */
1507     if (!s->mv_precision)
1508         return;
1509
1510     for (i = 1; i < 4; i++) {
1511         if (!ref->hpel_base[plane][i])
1512             ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1513         /* we need to be 16-byte aligned even for chroma */
1514         ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1515     }
1516
1517     if (!ref->interpolated[plane]) {
1518         s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1519                                       ref->hpel[plane][3], ref->hpel[plane][0],
1520                                       ref->avframe.linesize[plane], width, height);
1521         s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1522         s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1523         s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1524     }
1525     ref->interpolated[plane] = 1;
1526 }
1527
1528 /**
1529  * Dirac Specification ->
1530  * 13.0 Transform data syntax. transform_data()
1531  */
1532 static int dirac_decode_frame_internal(DiracContext *s)
1533 {
1534     DWTContext d;
1535     int y, i, comp, dsty;
1536
1537     if (s->low_delay) {
1538         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1539         for (comp = 0; comp < 3; comp++) {
1540             Plane *p = &s->plane[comp];
1541             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1542         }
1543         if (!s->zero_res)
1544             decode_lowdelay(s);
1545     }
1546
1547     for (comp = 0; comp < 3; comp++) {
1548         Plane *p       = &s->plane[comp];
1549         uint8_t *frame = s->current_picture->avframe.data[comp];
1550
1551         /* FIXME: small resolutions */
1552         for (i = 0; i < 4; i++)
1553             s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1554
1555         if (!s->zero_res && !s->low_delay)
1556         {
1557             memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1558             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1559         }
1560         if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1561                                   s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1562             return -1;
1563
1564         if (!s->num_refs) { /* intra */
1565             for (y = 0; y < p->height; y += 16) {
1566                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1567                 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1568                                                     p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1569             }
1570         } else { /* inter */
1571             int rowheight = p->ybsep*p->stride;
1572
1573             select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1574
1575             for (i = 0; i < s->num_refs; i++)
1576                 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1577
1578             memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1579
1580             dsty = -p->yoffset;
1581             for (y = 0; y < s->blheight; y++) {
1582                 int h     = 0,
1583                     start = FFMAX(dsty, 0);
1584                 uint16_t *mctmp    = s->mctmp + y*rowheight;
1585                 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1586
1587                 init_obmc_weights(s, p, y);
1588
1589                 if (y == s->blheight-1 || start+p->ybsep > p->height)
1590                     h = p->height - start;
1591                 else
1592                     h = p->ybsep - (start - dsty);
1593                 if (h < 0)
1594                     break;
1595
1596                 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1597                 mc_row(s, blocks, mctmp, comp, dsty);
1598
1599                 mctmp += (start - dsty)*p->stride + p->xoffset;
1600                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1601                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1602                                              p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1603
1604                 dsty += p->ybsep;
1605             }
1606         }
1607     }
1608
1609
1610     return 0;
1611 }
1612
1613 /**
1614  * Dirac Specification ->
1615  * 11.1.1 Picture Header. picture_header()
1616  */
1617 static int dirac_decode_picture_header(DiracContext *s)
1618 {
1619     int retire, picnum;
1620     int i, j, refnum, refdist;
1621     GetBitContext *gb = &s->gb;
1622
1623     /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1624     picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1625
1626
1627     av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1628
1629     /* if this is the first keyframe after a sequence header, start our
1630        reordering from here */
1631     if (s->frame_number < 0)
1632         s->frame_number = picnum;
1633
1634     s->ref_pics[0] = s->ref_pics[1] = NULL;
1635     for (i = 0; i < s->num_refs; i++) {
1636         refnum = picnum + dirac_get_se_golomb(gb);
1637         refdist = INT_MAX;
1638
1639         /* find the closest reference to the one we want */
1640         /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1641         for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1642             if (s->ref_frames[j]
1643                 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1644                 s->ref_pics[i] = s->ref_frames[j];
1645                 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1646             }
1647
1648         if (!s->ref_pics[i] || refdist)
1649             av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1650
1651         /* if there were no references at all, allocate one */
1652         if (!s->ref_pics[i])
1653             for (j = 0; j < MAX_FRAMES; j++)
1654                 if (!s->all_frames[j].avframe.data[0]) {
1655                     s->ref_pics[i] = &s->all_frames[j];
1656                     s->avctx->get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1657                 }
1658     }
1659
1660     /* retire the reference frames that are not used anymore */
1661     if (s->current_picture->avframe.reference) {
1662         retire = picnum + dirac_get_se_golomb(gb);
1663         if (retire != picnum) {
1664             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1665
1666             if (retire_pic)
1667                 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1668             else
1669                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1670         }
1671
1672         /* if reference array is full, remove the oldest as per the spec */
1673         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1674             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1675             remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1676         }
1677     }
1678
1679     if (s->num_refs) {
1680         if (dirac_unpack_prediction_parameters(s))  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1681             return -1;
1682         if (dirac_unpack_block_motion_data(s))      /* [DIRAC_STD] 12. Block motion data syntax                       */
1683             return -1;
1684     }
1685     if (dirac_unpack_idwt_params(s))                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
1686         return -1;
1687
1688     init_planes(s);
1689     return 0;
1690 }
1691
1692 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *data_size)
1693 {
1694     DiracFrame *out = s->delay_frames[0];
1695     int i, out_idx  = 0;
1696
1697     /* find frame with lowest picture number */
1698     for (i = 1; s->delay_frames[i]; i++)
1699         if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1700             out     = s->delay_frames[i];
1701             out_idx = i;
1702         }
1703
1704     for (i = out_idx; s->delay_frames[i]; i++)
1705         s->delay_frames[i] = s->delay_frames[i+1];
1706
1707     if (out) {
1708         out->avframe.reference ^= DELAYED_PIC_REF;
1709         *data_size = sizeof(AVFrame);
1710         *(AVFrame *)picture = out->avframe;
1711     }
1712
1713     return 0;
1714 }
1715
1716 /**
1717  * Dirac Specification ->
1718  * 9.6 Parse Info Header Syntax. parse_info()
1719  * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1720  */
1721 #define DATA_UNIT_HEADER_SIZE 13
1722
1723 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1724    inside the function parse_sequence() */
1725 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1726 {
1727     DiracContext *s   = avctx->priv_data;
1728     DiracFrame *pic   = NULL;
1729     int i, parse_code = buf[4];
1730     unsigned tmp;
1731
1732     if (size < DATA_UNIT_HEADER_SIZE)
1733         return -1;
1734
1735     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1736
1737     if (parse_code == pc_seq_header) {
1738         if (s->seen_sequence_header)
1739             return 0;
1740
1741         /* [DIRAC_STD] 10. Sequence header */
1742         if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1743             return -1;
1744
1745         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1746
1747         if (alloc_sequence_buffers(s))
1748             return -1;
1749
1750         s->seen_sequence_header = 1;
1751     } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1752         free_sequence_buffers(s);
1753         s->seen_sequence_header = 0;
1754     } else if (parse_code == pc_aux_data) {
1755         if (buf[13] == 1) {     /* encoder implementation/version */
1756             int ver[3];
1757             /* versions older than 1.0.8 don't store quant delta for
1758                subbands with only one codeblock */
1759             if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1760                 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1761                     s->old_delta_quant = 1;
1762         }
1763     } else if (parse_code & 0x8) {  /* picture data unit */
1764         if (!s->seen_sequence_header) {
1765             av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1766             return -1;
1767         }
1768
1769         /* find an unused frame */
1770         for (i = 0; i < MAX_FRAMES; i++)
1771             if (s->all_frames[i].avframe.data[0] == NULL)
1772                 pic = &s->all_frames[i];
1773         if (!pic) {
1774             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1775             return -1;
1776         }
1777
1778         avcodec_get_frame_defaults(&pic->avframe);
1779
1780         /* [DIRAC_STD] Defined in 9.6.1 ... */
1781         tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
1782         if (tmp > 2) {
1783             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1784             return -1;
1785         }
1786         s->num_refs    = tmp;
1787         s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
1788         s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
1789         pic->avframe.reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
1790         pic->avframe.key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
1791         pic->avframe.pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
1792
1793         if (avctx->get_buffer(avctx, &pic->avframe) < 0) {
1794             av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1795             return -1;
1796         }
1797         s->current_picture = pic;
1798         s->plane[0].stride = pic->avframe.linesize[0];
1799         s->plane[1].stride = pic->avframe.linesize[1];
1800         s->plane[2].stride = pic->avframe.linesize[2];
1801
1802         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1803         if (dirac_decode_picture_header(s))
1804             return -1;
1805
1806         /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1807         if (dirac_decode_frame_internal(s))
1808             return -1;
1809     }
1810     return 0;
1811 }
1812
1813 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *pkt)
1814 {
1815     DiracContext *s     = avctx->priv_data;
1816     DiracFrame *picture = data;
1817     uint8_t *buf        = pkt->data;
1818     int buf_size        = pkt->size;
1819     int i, data_unit_size, buf_idx = 0;
1820
1821     /* release unused frames */
1822     for (i = 0; i < MAX_FRAMES; i++)
1823         if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1824             avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1825             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1826         }
1827
1828     s->current_picture = NULL;
1829     *data_size = 0;
1830
1831     /* end of stream, so flush delayed pics */
1832     if (buf_size == 0)
1833         return get_delayed_pic(s, (AVFrame *)data, data_size);
1834
1835     for (;;) {
1836         /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1837           [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1838           BBCD start code search */
1839         for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1840             if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
1841                 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1842                 break;
1843         }
1844         /* BBCD found or end of data */
1845         if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1846             break;
1847
1848         data_unit_size = AV_RB32(buf+buf_idx+5);
1849         if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1850             if(buf_idx + data_unit_size > buf_size)
1851             av_log(s->avctx, AV_LOG_ERROR,
1852                    "Data unit with size %d is larger than input buffer, discarding\n",
1853                    data_unit_size);
1854             buf_idx += 4;
1855             continue;
1856         }
1857         /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1858         if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1859         {
1860             av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1861             return -1;
1862         }
1863         buf_idx += data_unit_size;
1864     }
1865
1866     if (!s->current_picture)
1867         return 0;
1868
1869     if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1870         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1871
1872         s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1873
1874         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1875             int min_num = s->delay_frames[0]->avframe.display_picture_number;
1876             /* Too many delayed frames, so we display the frame with the lowest pts */
1877             av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1878             delayed_frame = s->delay_frames[0];
1879
1880             for (i = 1; s->delay_frames[i]; i++)
1881                 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1882                     min_num = s->delay_frames[i]->avframe.display_picture_number;
1883
1884             delayed_frame = remove_frame(s->delay_frames, min_num);
1885             add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1886         }
1887
1888         if (delayed_frame) {
1889             delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1890             *(AVFrame*)data = delayed_frame->avframe;
1891             *data_size = sizeof(AVFrame);
1892         }
1893     } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1894         /* The right frame at the right time :-) */
1895         *(AVFrame*)data = s->current_picture->avframe;
1896         *data_size = sizeof(AVFrame);
1897     }
1898
1899     if (*data_size)
1900         s->frame_number = picture->avframe.display_picture_number + 1;
1901
1902     return buf_idx;
1903 }
1904
1905 AVCodec ff_dirac_decoder = {
1906     .name           = "dirac",
1907     .type           = AVMEDIA_TYPE_VIDEO,
1908     .id             = CODEC_ID_DIRAC,
1909     .priv_data_size = sizeof(DiracContext),
1910     .init           = dirac_decode_init,
1911     .close          = dirac_decode_end,
1912     .decode         = dirac_decode_frame,
1913     .capabilities   = CODEC_CAP_DELAY,
1914     .flush          = dirac_decode_flush,
1915     .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1916 };