2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * @file libavcodec/diracdec.c
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
34 #include "dirac_arith.h"
35 #include "mpeg12data.h"
41 * The spec limits the number of wavelet decompositions to 4 for both
42 * level 1 (VC-2) and 128 (long-gop default).
43 * 5 decompositions is the maximum before >16-bit buffers are needed.
44 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
45 * the others to 4 decompositions (or 3 for the fidelity filter).
47 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
49 #define MAX_DWT_LEVELS 5
52 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
54 #define MAX_REFERENCE_FRAMES 8
55 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
56 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
57 #define MAX_QUANT 68 /* max quant for VC-2 */
58 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
61 * DiracBlock->ref flags, if set then the block does MC from the given ref
63 #define DIRAC_REF_MASK_REF1 1
64 #define DIRAC_REF_MASK_REF2 2
65 #define DIRAC_REF_MASK_GLOBAL 4
68 * Value of Picture.reference when Picture is not a reference picture, but
69 * is held for delayed output.
71 #define DELAYED_PIC_REF 4
73 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
75 #define CALC_PADDING(size, depth) \
76 (((size + (1 << depth) - 1) >> depth) << depth)
78 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
82 int interpolated[3]; /* 1 if hpel[] is valid */
84 uint8_t *hpel_base[3][4];
91 } u; /* anonymous unions aren't in C99 :( */
95 typedef struct SubBand {
103 struct SubBand *parent;
107 const uint8_t *coeff_data;
110 typedef struct Plane {
119 IDWTELEM *idwt_buf_base;
125 /* block separation (block n+1 starts after this many pixels in block n) */
128 /* amount of overspill on each edge (half of the overlap between blocks) */
132 SubBand band[MAX_DWT_LEVELS][4];
135 typedef struct DiracContext {
136 AVCodecContext *avctx;
138 DiracDSPContext diracdsp;
140 dirac_source_params source;
141 int seen_sequence_header;
142 int frame_number; /* number of the next frame to display */
147 int zero_res; /* zero residue flag */
148 int is_arith; /* whether coeffs use arith or golomb coding */
149 int low_delay; /* use the low delay syntax */
150 int globalmc_flag; /* use global motion compensation */
151 int num_refs; /* number of reference pictures */
153 /* wavelet decoding */
154 unsigned wavelet_depth; /* depth of the IDWT */
155 unsigned wavelet_idx;
158 * schroedinger older than 1.0.8 doesn't store
159 * quant delta if only one codebook exists in a band
161 unsigned old_delta_quant;
162 unsigned codeblock_mode;
167 } codeblock[MAX_DWT_LEVELS+1];
170 unsigned num_x; /* number of horizontal slices */
171 unsigned num_y; /* number of vertical slices */
172 AVRational bytes; /* average bytes per slice */
173 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
177 int pan_tilt[2]; /* pan/tilt vector */
178 int zrs[2][2]; /* zoom/rotate/shear matrix */
179 int perspective[2]; /* perspective vector */
181 unsigned perspective_exp;
184 /* motion compensation */
185 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
186 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
187 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
189 int blwidth; /* number of blocks (horizontally) */
190 int blheight; /* number of blocks (vertically) */
191 int sbwidth; /* number of superblocks (horizontally) */
192 int sbheight; /* number of superblocks (vertically) */
195 DiracBlock *blmotion;
197 uint8_t *edge_emu_buffer[4];
198 uint8_t *edge_emu_buffer_base;
200 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
203 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
205 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
206 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
207 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
208 dirac_weight_func weight_func;
209 dirac_biweight_func biweight_func;
211 DiracFrame *current_picture;
212 DiracFrame *ref_pics[2];
214 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
215 DiracFrame *delay_frames[MAX_DELAY+1];
216 DiracFrame all_frames[MAX_FRAMES];
220 * Dirac Specification ->
221 * Parse code values. 9.6.1 Table 9.1
223 enum dirac_parse_code {
224 pc_seq_header = 0x00,
237 static const uint8_t default_qmat[][4][4] = {
238 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
239 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
242 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
243 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
244 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
247 static const int qscale_tab[MAX_QUANT+1] = {
248 4, 5, 6, 7, 8, 10, 11, 13,
249 16, 19, 23, 27, 32, 38, 45, 54,
250 64, 76, 91, 108, 128, 152, 181, 215,
251 256, 304, 362, 431, 512, 609, 724, 861,
252 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
253 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
254 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
258 static const int qoffset_intra_tab[MAX_QUANT+1] = {
259 1, 2, 3, 4, 4, 5, 6, 7,
260 8, 10, 12, 14, 16, 19, 23, 27,
261 32, 38, 46, 54, 64, 76, 91, 108,
262 128, 152, 181, 216, 256, 305, 362, 431,
263 512, 609, 724, 861, 1024, 1218, 1448, 1722,
264 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
265 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
269 static const int qoffset_inter_tab[MAX_QUANT+1] = {
270 1, 2, 2, 3, 3, 4, 4, 5,
271 6, 7, 9, 10, 12, 14, 17, 20,
272 24, 29, 34, 41, 48, 57, 68, 81,
273 96, 114, 136, 162, 192, 228, 272, 323,
274 384, 457, 543, 646, 768, 913, 1086, 1292,
275 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
276 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
280 /* magic number division by 3 from schroedinger */
281 static inline int divide3(int x)
283 return ((x+1)*21845 + 10922) >> 16;
286 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
288 DiracFrame *remove_pic = NULL;
289 int i, remove_idx = -1;
291 for (i = 0; framelist[i]; i++)
292 if (framelist[i]->avframe.display_picture_number == picnum) {
293 remove_pic = framelist[i];
298 for (i = remove_idx; framelist[i]; i++)
299 framelist[i] = framelist[i+1];
304 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
307 for (i = 0; i < maxframes; i++)
309 framelist[i] = frame;
315 static int alloc_sequence_buffers(DiracContext *s)
317 int sbwidth = DIVRNDUP(s->source.width, 4);
318 int sbheight = DIVRNDUP(s->source.height, 4);
319 int i, w, h, top_padding;
321 /* todo: think more about this / use or set Plane here */
322 for (i = 0; i < 3; i++) {
323 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
324 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
325 w = s->source.width >> (i ? s->chroma_x_shift : 0);
326 h = s->source.height >> (i ? s->chroma_y_shift : 0);
328 /* we allocate the max we support here since num decompositions can
329 * change from frame to frame. Stride is aligned to 16 for SIMD, and
330 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
331 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
333 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
334 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
335 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
337 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
338 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
339 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
340 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
341 return AVERROR(ENOMEM);
345 h = s->source.height;
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc(sbwidth * sbheight);
349 s->blmotion = av_malloc(sbwidth * sbheight * 4 * sizeof(*s->blmotion));
350 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
352 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h*MAX_BLOCKSIZE) * sizeof(*s->mctmp));
353 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
355 if (!s->sbsplit || !s->blmotion)
356 return AVERROR(ENOMEM);
360 static void free_sequence_buffers(DiracContext *s)
364 for (i = 0; i < MAX_FRAMES; i++) {
365 if (s->all_frames[i].avframe.data[0]) {
366 s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
367 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
370 for (j = 0; j < 3; j++)
371 for (k = 1; k < 4; k++)
372 av_freep(&s->all_frames[i].hpel_base[j][k]);
375 memset(s->ref_frames, 0, sizeof(s->ref_frames));
376 memset(s->delay_frames, 0, sizeof(s->delay_frames));
378 for (i = 0; i < 3; i++) {
379 av_freep(&s->plane[i].idwt_buf_base);
380 av_freep(&s->plane[i].idwt_tmp);
383 av_freep(&s->sbsplit);
384 av_freep(&s->blmotion);
385 av_freep(&s->edge_emu_buffer_base);
388 av_freep(&s->mcscratch);
391 static av_cold int dirac_decode_init(AVCodecContext *avctx)
393 DiracContext *s = avctx->priv_data;
395 s->frame_number = -1;
397 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
398 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
399 return AVERROR_PATCHWELCOME;
402 dsputil_init(&s->dsp, avctx);
403 ff_diracdsp_init(&s->diracdsp);
408 static void dirac_decode_flush(AVCodecContext *avctx)
410 DiracContext *s = avctx->priv_data;
411 free_sequence_buffers(s);
412 s->seen_sequence_header = 0;
413 s->frame_number = -1;
416 static av_cold int dirac_decode_end(AVCodecContext *avctx)
418 dirac_decode_flush(avctx);
422 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
424 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
425 SubBand *b, IDWTELEM *buf, int x, int y)
429 int pred_ctx = CTX_ZPZN_F1;
431 /* Check if the parent subband has a 0 in the corresponding position */
433 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
435 if (b->orientation == subband_hl)
436 sign_pred = buf[-b->stride];
438 /* Determine if the pixel has only zeros in its neighbourhood */
440 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
441 if (b->orientation == subband_lh)
444 pred_ctx += !buf[-b->stride];
447 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
449 coeff = (coeff * qfactor + qoffset + 2) >> 2;
450 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
451 coeff = (coeff ^ -sign) + sign;
456 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
460 coeff = svq3_get_ue_golomb(gb);
462 coeff = (coeff * qfactor + qoffset + 2) >> 2;
463 sign = get_bits1(gb);
464 coeff = (coeff ^ -sign) + sign;
470 * Decode the coeffs in the rectangle defined by left, right, top, bottom
471 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
473 static inline void codeblock(DiracContext *s, SubBand *b,
474 GetBitContext *gb, DiracArith *c,
475 int left, int right, int top, int bottom,
476 int blockcnt_one, int is_arith)
478 int x, y, zero_block;
479 int qoffset, qfactor;
482 /* check for any coded coefficients in this codeblock */
485 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
487 zero_block = get_bits1(gb);
493 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
495 b->quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
497 b->quant += dirac_get_se_golomb(gb);
500 b->quant = FFMIN(b->quant, MAX_QUANT);
502 qfactor = qscale_tab[b->quant];
503 /* TODO: context pointer? */
505 qoffset = qoffset_intra_tab[b->quant];
507 qoffset = qoffset_inter_tab[b->quant];
509 buf = b->ibuf + top * b->stride;
510 for (y = top; y < bottom; y++) {
511 for (x = left; x < right; x++) {
512 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
514 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
516 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
523 * Dirac Specification ->
524 * 13.3 intra_dc_prediction(band)
526 static inline void intra_dc_prediction(SubBand *b)
528 IDWTELEM *buf = b->ibuf;
531 for (x = 1; x < b->width; x++)
535 for (y = 1; y < b->height; y++) {
536 buf[0] += buf[-b->stride];
538 for (x = 1; x < b->width; x++) {
539 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
540 buf[x] += divide3(pred);
547 * Dirac Specification ->
548 * 13.4.2 Non-skipped subbands. subband_coeffs()
550 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
552 int cb_x, cb_y, left, right, top, bottom;
555 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
556 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
557 int blockcnt_one = (cb_width + cb_height) == 2;
562 init_get_bits(&gb, b->coeff_data, b->length*8);
565 ff_dirac_init_arith_decoder(&c, &gb, b->length);
568 for (cb_y = 0; cb_y < cb_height; cb_y++) {
569 bottom = (b->height * (cb_y+1)) / cb_height;
571 for (cb_x = 0; cb_x < cb_width; cb_x++) {
572 right = (b->width * (cb_x+1)) / cb_width;
573 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
579 if (b->orientation == subband_ll && s->num_refs == 0)
580 intra_dc_prediction(b);
583 static int decode_subband_arith(AVCodecContext *avctx, void *b)
585 DiracContext *s = avctx->priv_data;
586 decode_subband_internal(s, b, 1);
590 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
592 DiracContext *s = avctx->priv_data;
594 decode_subband_internal(s, *b, 0);
599 * Dirac Specification ->
600 * [DIRAC_STD] 13.4.1 core_transform_data()
602 static void decode_component(DiracContext *s, int comp)
604 AVCodecContext *avctx = s->avctx;
605 SubBand *bands[3*MAX_DWT_LEVELS+1];
606 enum dirac_subband orientation;
607 int level, num_bands = 0;
609 /* Unpack all subbands at all levels. */
610 for (level = 0; level < s->wavelet_depth; level++) {
611 for (orientation = !!level; orientation < 4; orientation++) {
612 SubBand *b = &s->plane[comp].band[level][orientation];
613 bands[num_bands++] = b;
615 align_get_bits(&s->gb);
616 /* [DIRAC_STD] 13.4.2 subband() */
617 b->length = svq3_get_ue_golomb(&s->gb);
619 b->quant = svq3_get_ue_golomb(&s->gb);
620 align_get_bits(&s->gb);
621 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
622 b->length = FFMIN(b->length, get_bits_left(&s->gb)/8);
623 skip_bits_long(&s->gb, b->length*8);
626 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
628 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
629 NULL, 4-!!level, sizeof(SubBand));
631 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
633 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
636 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
637 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
638 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
639 int slice_x, int slice_y, int bits_end,
640 SubBand *b1, SubBand *b2)
642 int left = b1->width * slice_x / s->lowdelay.num_x;
643 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
644 int top = b1->height * slice_y / s->lowdelay.num_y;
645 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
647 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
648 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
650 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
651 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
653 /* we have to constantly check for overread since the spec explictly
654 requires this, with the meaning that all remaining coeffs are set to 0 */
655 if (get_bits_count(gb) >= bits_end)
658 for (y = top; y < bottom; y++) {
659 for (x = left; x < right; x++) {
660 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
661 if (get_bits_count(gb) >= bits_end)
664 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
665 if (get_bits_count(gb) >= bits_end)
675 struct lowdelay_slice {
684 * Dirac Specification ->
685 * 13.5.2 Slices. slice(sx,sy)
687 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
689 DiracContext *s = avctx->priv_data;
690 struct lowdelay_slice *slice = arg;
691 GetBitContext *gb = &slice->gb;
692 enum dirac_subband orientation;
693 int level, quant, chroma_bits, chroma_end;
695 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
696 int length_bits = av_log2(8 * slice->bytes)+1;
697 int luma_bits = get_bits_long(gb, length_bits);
698 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
700 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
701 for (level = 0; level < s->wavelet_depth; level++)
702 for (orientation = !!level; orientation < 4; orientation++) {
703 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
704 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
705 &s->plane[0].band[level][orientation], NULL);
708 /* consume any unused bits from luma */
709 skip_bits_long(gb, get_bits_count(gb) - luma_end);
711 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
712 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
713 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
714 for (level = 0; level < s->wavelet_depth; level++)
715 for (orientation = !!level; orientation < 4; orientation++) {
716 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
717 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
718 &s->plane[1].band[level][orientation],
719 &s->plane[2].band[level][orientation]);
726 * Dirac Specification ->
727 * 13.5.1 low_delay_transform_data()
729 static void decode_lowdelay(DiracContext *s)
731 AVCodecContext *avctx = s->avctx;
732 int slice_x, slice_y, bytes, bufsize;
734 struct lowdelay_slice *slices;
737 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
739 align_get_bits(&s->gb);
740 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
741 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
742 bufsize = get_bits_left(&s->gb);
744 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
745 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
746 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
747 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
749 slices[slice_num].bytes = bytes;
750 slices[slice_num].slice_x = slice_x;
751 slices[slice_num].slice_y = slice_y;
752 init_get_bits(&slices[slice_num].gb, buf, bufsize);
759 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
760 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
761 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
762 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
763 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
767 static void init_planes(DiracContext *s)
769 int i, w, h, level, orientation;
771 for (i = 0; i < 3; i++) {
772 Plane *p = &s->plane[i];
774 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
775 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
776 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
777 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
778 p->idwt_stride = FFALIGN(p->idwt_width, 8);
780 for (level = s->wavelet_depth-1; level >= 0; level--) {
783 for (orientation = !!level; orientation < 4; orientation++) {
784 SubBand *b = &p->band[level][orientation];
786 b->ibuf = p->idwt_buf;
788 b->stride = p->idwt_stride << (s->wavelet_depth - level);
791 b->orientation = orientation;
796 b->ibuf += b->stride>>1;
799 b->parent = &p->band[level-1][orientation];
804 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
805 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
806 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
807 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
810 p->xoffset = (p->xblen - p->xbsep)/2;
811 p->yoffset = (p->yblen - p->ybsep)/2;
816 * Unpack the motion compensation parameters
817 * Dirac Specification ->
818 * 11.2 Picture prediction data. picture_prediction()
820 static int dirac_unpack_prediction_parameters(DiracContext *s)
822 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
823 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
825 GetBitContext *gb = &s->gb;
829 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
830 /* Luma and Chroma are equal. 11.2.3 */
831 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
834 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
839 s->plane[0].xblen = svq3_get_ue_golomb(gb);
840 s->plane[0].yblen = svq3_get_ue_golomb(gb);
841 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
842 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
844 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
845 s->plane[0].xblen = default_blen[idx-1];
846 s->plane[0].yblen = default_blen[idx-1];
847 s->plane[0].xbsep = default_bsep[idx-1];
848 s->plane[0].ybsep = default_bsep[idx-1];
850 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
851 Calculated in function dirac_unpack_block_motion_data */
853 if (s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
854 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
857 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
858 av_log(s->avctx, AV_LOG_ERROR, "Block seperation greater than size\n");
861 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
862 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
866 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
867 Read motion vector precision */
868 s->mv_precision = svq3_get_ue_golomb(gb);
869 if (s->mv_precision > 3) {
870 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
874 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
875 Read the global motion compensation parameters */
876 s->globalmc_flag = get_bits1(gb);
877 if (s->globalmc_flag) {
878 memset(s->globalmc, 0, sizeof(s->globalmc));
879 /* [DIRAC_STD] pan_tilt(gparams) */
880 for (ref = 0; ref < s->num_refs; ref++) {
882 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
883 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
885 /* [DIRAC_STD] zoom_rotate_shear(gparams)
886 zoom/rotation/shear parameters */
888 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
889 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
890 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
891 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
892 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
894 s->globalmc[ref].zrs[0][0] = 1;
895 s->globalmc[ref].zrs[1][1] = 1;
897 /* [DIRAC_STD] perspective(gparams) */
899 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
900 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
901 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
906 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
907 Picture prediction mode, not currently used. */
908 if (svq3_get_ue_golomb(gb)) {
909 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
913 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
914 just data read, weight calculation will be done later on. */
915 s->weight_log2denom = 1;
920 s->weight_log2denom = svq3_get_ue_golomb(gb);
921 s->weight[0] = dirac_get_se_golomb(gb);
922 if (s->num_refs == 2)
923 s->weight[1] = dirac_get_se_golomb(gb);
929 * Dirac Specification ->
930 * 11.3 Wavelet transform data. wavelet_transform()
932 static int dirac_unpack_idwt_params(DiracContext *s)
934 GetBitContext *gb = &s->gb;
938 #define CHECKEDREAD(dst, cond, errmsg) \
939 tmp = svq3_get_ue_golomb(gb); \
941 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
948 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
952 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
953 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
955 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
958 /* Codeblock paramaters (core syntax only) */
960 for (i = 0; i <= s->wavelet_depth; i++) {
961 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
962 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
965 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
967 for (i = 0; i <= s->wavelet_depth; i++)
968 s->codeblock[i].width = s->codeblock[i].height = 1;
970 /* Slice parameters + quantization matrix*/
971 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
972 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
973 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
974 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
975 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
977 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
979 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
980 /* custom quantization matrix */
981 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
982 for (level = 0; level < s->wavelet_depth; level++) {
983 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
984 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
985 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
988 /* default quantization matrix */
989 for (level = 0; level < s->wavelet_depth; level++)
990 for (i = 0; i < 4; i++) {
991 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
992 /* haar with no shift differs for different depths */
993 if (s->wavelet_idx == 3)
994 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1001 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1003 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1010 return sbsplit[-stride];
1012 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1015 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1022 return block[-1].ref & refmask;
1024 return block[-stride].ref & refmask;
1026 /* return the majority */
1027 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1028 return (pred >> 1) & refmask;
1031 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1035 memset(block->u.dc, 0, sizeof(block->u.dc));
1037 if (x && !(block[-1].ref & 3)) {
1038 for (i = 0; i < 3; i++)
1039 block->u.dc[i] += block[-1].u.dc[i];
1043 if (y && !(block[-stride].ref & 3)) {
1044 for (i = 0; i < 3; i++)
1045 block->u.dc[i] += block[-stride].u.dc[i];
1049 if (x && y && !(block[-1-stride].ref & 3)) {
1050 for (i = 0; i < 3; i++)
1051 block->u.dc[i] += block[-1-stride].u.dc[i];
1056 for (i = 0; i < 3; i++)
1057 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1058 } else if (n == 3) {
1059 for (i = 0; i < 3; i++)
1060 block->u.dc[i] = divide3(block->u.dc[i]);
1064 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1067 int refmask = ref+1;
1068 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1071 if (x && (block[-1].ref & mask) == refmask)
1072 pred[n++] = block[-1].u.mv[ref];
1074 if (y && (block[-stride].ref & mask) == refmask)
1075 pred[n++] = block[-stride].u.mv[ref];
1077 if (x && y && (block[-stride-1].ref & mask) == refmask)
1078 pred[n++] = block[-stride-1].u.mv[ref];
1082 block->u.mv[ref][0] = 0;
1083 block->u.mv[ref][1] = 0;
1086 block->u.mv[ref][0] = pred[0][0];
1087 block->u.mv[ref][1] = pred[0][1];
1090 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1091 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1094 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1095 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1100 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1102 int ez = s->globalmc[ref].zrs_exp;
1103 int ep = s->globalmc[ref].perspective_exp;
1104 int (*A)[2] = s->globalmc[ref].zrs;
1105 int *b = s->globalmc[ref].pan_tilt;
1106 int *c = s->globalmc[ref].perspective;
1108 int m = (1<<ep) - (c[0]*x + c[1]*y);
1109 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1110 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1112 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1113 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1116 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1117 int stride, int x, int y)
1121 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1122 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1124 if (s->num_refs == 2) {
1125 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1126 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1130 pred_block_dc(block, stride, x, y);
1131 for (i = 0; i < 3; i++)
1132 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1136 if (s->globalmc_flag) {
1137 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1138 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1141 for (i = 0; i < s->num_refs; i++)
1142 if (block->ref & (i+1)) {
1143 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1144 global_mv(s, block, x, y, i);
1146 pred_mv(block, stride, x, y, i);
1147 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1148 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1154 * Copies the current block to the other blocks covered by the current superblock split mode
1156 static void propagate_block_data(DiracBlock *block, int stride, int size)
1159 DiracBlock *dst = block;
1161 for (x = 1; x < size; x++)
1164 for (y = 1; y < size; y++) {
1166 for (x = 0; x < size; x++)
1172 * Dirac Specification ->
1173 * 12. Block motion data syntax
1175 static int dirac_unpack_block_motion_data(DiracContext *s)
1177 GetBitContext *gb = &s->gb;
1178 uint8_t *sbsplit = s->sbsplit;
1180 DiracArith arith[8];
1184 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1185 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1186 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1187 s->blwidth = 4 * s->sbwidth;
1188 s->blheight = 4 * s->sbheight;
1190 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1191 decode superblock split modes */
1192 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1193 for (y = 0; y < s->sbheight; y++) {
1194 for (x = 0; x < s->sbwidth; x++) {
1195 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1198 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1200 sbsplit += s->sbwidth;
1203 /* setup arith decoding */
1204 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1205 for (i = 0; i < s->num_refs; i++) {
1206 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1207 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1209 for (i = 0; i < 3; i++)
1210 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1212 for (y = 0; y < s->sbheight; y++)
1213 for (x = 0; x < s->sbwidth; x++) {
1214 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1215 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1217 for (q = 0; q < blkcnt; q++)
1218 for (p = 0; p < blkcnt; p++) {
1219 int bx = 4 * x + p*step;
1220 int by = 4 * y + q*step;
1221 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1222 decode_block_params(s, arith, block, s->blwidth, bx, by);
1223 propagate_block_data(block, s->blwidth, step);
1230 static int weight(int i, int blen, int offset)
1232 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1233 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1237 else if (i > blen-1 - 2*offset)
1238 return ROLLOFF(blen-1 - i);
1242 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1243 int left, int right, int wy)
1246 for (x = 0; left && x < p->xblen >> 1; x++)
1247 obmc_weight[x] = wy*8;
1248 for (; x < p->xblen >> right; x++)
1249 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1250 for (; x < p->xblen; x++)
1251 obmc_weight[x] = wy*8;
1252 for (; x < stride; x++)
1256 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1257 int left, int right, int top, int bottom)
1260 for (y = 0; top && y < p->yblen >> 1; y++) {
1261 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1262 obmc_weight += stride;
1264 for (; y < p->yblen >> bottom; y++) {
1265 int wy = weight(y, p->yblen, p->yoffset);
1266 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1267 obmc_weight += stride;
1269 for (; y < p->yblen; y++) {
1270 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1271 obmc_weight += stride;
1275 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1278 int bottom = by == s->blheight-1;
1280 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1281 if (top || bottom || by == 1) {
1282 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1283 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1284 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1288 static const uint8_t epel_weights[4][4][4] = {
1308 * For block x,y, determine which of the hpel planes to do bilinear
1309 * interpolation from and set src[] to the location in each hpel plane
1312 * @return the index of the put_dirac_pixels_tab function to use
1313 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1315 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1316 int x, int y, int ref, int plane)
1318 Plane *p = &s->plane[plane];
1319 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1320 int motion_x = block->u.mv[ref][0];
1321 int motion_y = block->u.mv[ref][1];
1322 int mx, my, i, epel, nplanes = 0;
1325 motion_x >>= s->chroma_x_shift;
1326 motion_y >>= s->chroma_y_shift;
1329 mx = motion_x & ~(-1 << s->mv_precision);
1330 my = motion_y & ~(-1 << s->mv_precision);
1331 motion_x >>= s->mv_precision;
1332 motion_y >>= s->mv_precision;
1333 /* normalize subpel coordinates to epel */
1334 /* TODO: template this function? */
1335 mx <<= 3 - s->mv_precision;
1336 my <<= 3 - s->mv_precision;
1345 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1349 for (i = 0; i < 4; i++)
1350 src[i] = ref_hpel[i] + y*p->stride + x;
1352 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1353 we increment x/y because the edge changes for half of the pixels */
1360 src[0] += p->stride;
1361 src[1] += p->stride;
1369 /* check if we really only need 2 planes since either mx or my is
1370 a hpel position. (epel weights of 0 handle this there) */
1372 /* mx == 0: average [0] and [2]
1373 mx == 4: average [1] and [3] */
1374 src[!mx] = src[2 + !!mx];
1376 } else if (!(my&3)) {
1377 src[0] = src[(my>>1) ];
1378 src[1] = src[(my>>1)+1];
1382 /* adjust the ordering if needed so the weights work */
1384 FFSWAP(const uint8_t *, src[0], src[1]);
1385 FFSWAP(const uint8_t *, src[2], src[3]);
1388 FFSWAP(const uint8_t *, src[0], src[2]);
1389 FFSWAP(const uint8_t *, src[1], src[3]);
1391 src[4] = epel_weights[my&3][mx&3];
1395 /* fixme: v/h _edge_pos */
1396 if ((unsigned)x > p->width +EDGE_WIDTH/2 - p->xblen ||
1397 (unsigned)y > p->height+EDGE_WIDTH/2 - p->yblen) {
1398 for (i = 0; i < nplanes; i++) {
1399 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1400 p->xblen, p->yblen, x, y,
1401 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1402 src[i] = s->edge_emu_buffer[i];
1405 return (nplanes>>1) + epel;
1408 static void add_dc(uint16_t *dst, int dc, int stride,
1409 uint8_t *obmc_weight, int xblen, int yblen)
1414 for (y = 0; y < yblen; y++) {
1415 for (x = 0; x < xblen; x += 2) {
1416 dst[x ] += dc * obmc_weight[x ];
1417 dst[x+1] += dc * obmc_weight[x+1];
1420 obmc_weight += MAX_BLOCKSIZE;
1424 static void block_mc(DiracContext *s, DiracBlock *block,
1425 uint16_t *mctmp, uint8_t *obmc_weight,
1426 int plane, int dstx, int dsty)
1428 Plane *p = &s->plane[plane];
1429 const uint8_t *src[5];
1432 switch (block->ref&3) {
1434 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1438 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1439 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1441 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1442 s->weight[0] + s->weight[1], p->yblen);
1445 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1446 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1447 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1448 if (s->biweight_func) {
1449 /* fixme: +32 is a quick hack */
1450 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1451 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1452 s->weight[0], s->weight[1], p->yblen);
1454 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1457 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1460 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1462 Plane *p = &s->plane[plane];
1463 int x, dstx = p->xbsep - p->xoffset;
1465 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1468 for (x = 1; x < s->blwidth-1; x++) {
1469 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1473 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1476 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1484 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1485 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1486 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1487 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1488 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1489 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1491 s->weight_func = NULL;
1492 s->biweight_func = NULL;
1496 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1498 /* chroma allocates an edge of 8 when subsampled
1499 which for 4:2:2 means an h edge of 16 and v edge of 8
1500 just use 8 for everything for the moment */
1501 int i, edge = EDGE_WIDTH/2;
1503 ref->hpel[plane][0] = ref->avframe.data[plane];
1504 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1506 /* no need for hpel if we only have fpel vectors */
1507 if (!s->mv_precision)
1510 for (i = 1; i < 4; i++) {
1511 if (!ref->hpel_base[plane][i])
1512 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1513 /* we need to be 16-byte aligned even for chroma */
1514 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1517 if (!ref->interpolated[plane]) {
1518 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1519 ref->hpel[plane][3], ref->hpel[plane][0],
1520 ref->avframe.linesize[plane], width, height);
1521 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1522 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1523 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1525 ref->interpolated[plane] = 1;
1529 * Dirac Specification ->
1530 * 13.0 Transform data syntax. transform_data()
1532 static int dirac_decode_frame_internal(DiracContext *s)
1535 int y, i, comp, dsty;
1538 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1539 for (comp = 0; comp < 3; comp++) {
1540 Plane *p = &s->plane[comp];
1541 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1547 for (comp = 0; comp < 3; comp++) {
1548 Plane *p = &s->plane[comp];
1549 uint8_t *frame = s->current_picture->avframe.data[comp];
1551 /* FIXME: small resolutions */
1552 for (i = 0; i < 4; i++)
1553 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1555 if (!s->zero_res && !s->low_delay)
1557 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1558 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1560 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1561 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1564 if (!s->num_refs) { /* intra */
1565 for (y = 0; y < p->height; y += 16) {
1566 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1567 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1568 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1570 } else { /* inter */
1571 int rowheight = p->ybsep*p->stride;
1573 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1575 for (i = 0; i < s->num_refs; i++)
1576 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1578 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1581 for (y = 0; y < s->blheight; y++) {
1583 start = FFMAX(dsty, 0);
1584 uint16_t *mctmp = s->mctmp + y*rowheight;
1585 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1587 init_obmc_weights(s, p, y);
1589 if (y == s->blheight-1 || start+p->ybsep > p->height)
1590 h = p->height - start;
1592 h = p->ybsep - (start - dsty);
1596 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1597 mc_row(s, blocks, mctmp, comp, dsty);
1599 mctmp += (start - dsty)*p->stride + p->xoffset;
1600 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1601 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1602 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1614 * Dirac Specification ->
1615 * 11.1.1 Picture Header. picture_header()
1617 static int dirac_decode_picture_header(DiracContext *s)
1620 int i, j, refnum, refdist;
1621 GetBitContext *gb = &s->gb;
1623 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1624 picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1627 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1629 /* if this is the first keyframe after a sequence header, start our
1630 reordering from here */
1631 if (s->frame_number < 0)
1632 s->frame_number = picnum;
1634 s->ref_pics[0] = s->ref_pics[1] = NULL;
1635 for (i = 0; i < s->num_refs; i++) {
1636 refnum = picnum + dirac_get_se_golomb(gb);
1639 /* find the closest reference to the one we want */
1640 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1641 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1642 if (s->ref_frames[j]
1643 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1644 s->ref_pics[i] = s->ref_frames[j];
1645 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1648 if (!s->ref_pics[i] || refdist)
1649 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1651 /* if there were no references at all, allocate one */
1652 if (!s->ref_pics[i])
1653 for (j = 0; j < MAX_FRAMES; j++)
1654 if (!s->all_frames[j].avframe.data[0]) {
1655 s->ref_pics[i] = &s->all_frames[j];
1656 s->avctx->get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1660 /* retire the reference frames that are not used anymore */
1661 if (s->current_picture->avframe.reference) {
1662 retire = picnum + dirac_get_se_golomb(gb);
1663 if (retire != picnum) {
1664 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1667 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1669 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1672 /* if reference array is full, remove the oldest as per the spec */
1673 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1674 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1675 remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1680 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1682 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1685 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1692 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *data_size)
1694 DiracFrame *out = s->delay_frames[0];
1697 /* find frame with lowest picture number */
1698 for (i = 1; s->delay_frames[i]; i++)
1699 if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1700 out = s->delay_frames[i];
1704 for (i = out_idx; s->delay_frames[i]; i++)
1705 s->delay_frames[i] = s->delay_frames[i+1];
1708 out->avframe.reference ^= DELAYED_PIC_REF;
1709 *data_size = sizeof(AVFrame);
1710 *(AVFrame *)picture = out->avframe;
1717 * Dirac Specification ->
1718 * 9.6 Parse Info Header Syntax. parse_info()
1719 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1721 #define DATA_UNIT_HEADER_SIZE 13
1723 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1724 inside the function parse_sequence() */
1725 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1727 DiracContext *s = avctx->priv_data;
1728 DiracFrame *pic = NULL;
1729 int i, parse_code = buf[4];
1732 if (size < DATA_UNIT_HEADER_SIZE)
1735 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1737 if (parse_code == pc_seq_header) {
1738 if (s->seen_sequence_header)
1741 /* [DIRAC_STD] 10. Sequence header */
1742 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1745 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1747 if (alloc_sequence_buffers(s))
1750 s->seen_sequence_header = 1;
1751 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1752 free_sequence_buffers(s);
1753 s->seen_sequence_header = 0;
1754 } else if (parse_code == pc_aux_data) {
1755 if (buf[13] == 1) { /* encoder implementation/version */
1757 /* versions older than 1.0.8 don't store quant delta for
1758 subbands with only one codeblock */
1759 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1760 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1761 s->old_delta_quant = 1;
1763 } else if (parse_code & 0x8) { /* picture data unit */
1764 if (!s->seen_sequence_header) {
1765 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1769 /* find an unused frame */
1770 for (i = 0; i < MAX_FRAMES; i++)
1771 if (s->all_frames[i].avframe.data[0] == NULL)
1772 pic = &s->all_frames[i];
1774 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1778 avcodec_get_frame_defaults(&pic->avframe);
1780 /* [DIRAC_STD] Defined in 9.6.1 ... */
1781 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1783 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1787 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1788 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1789 pic->avframe.reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1790 pic->avframe.key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1791 pic->avframe.pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1793 if (avctx->get_buffer(avctx, &pic->avframe) < 0) {
1794 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1797 s->current_picture = pic;
1798 s->plane[0].stride = pic->avframe.linesize[0];
1799 s->plane[1].stride = pic->avframe.linesize[1];
1800 s->plane[2].stride = pic->avframe.linesize[2];
1802 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1803 if (dirac_decode_picture_header(s))
1806 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1807 if (dirac_decode_frame_internal(s))
1813 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *pkt)
1815 DiracContext *s = avctx->priv_data;
1816 DiracFrame *picture = data;
1817 uint8_t *buf = pkt->data;
1818 int buf_size = pkt->size;
1819 int i, data_unit_size, buf_idx = 0;
1821 /* release unused frames */
1822 for (i = 0; i < MAX_FRAMES; i++)
1823 if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1824 avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1825 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1828 s->current_picture = NULL;
1831 /* end of stream, so flush delayed pics */
1833 return get_delayed_pic(s, (AVFrame *)data, data_size);
1836 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1837 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1838 BBCD start code search */
1839 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1840 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1841 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1844 /* BBCD found or end of data */
1845 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1848 data_unit_size = AV_RB32(buf+buf_idx+5);
1849 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1850 if(buf_idx + data_unit_size > buf_size)
1851 av_log(s->avctx, AV_LOG_ERROR,
1852 "Data unit with size %d is larger than input buffer, discarding\n",
1857 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1858 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1860 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1863 buf_idx += data_unit_size;
1866 if (!s->current_picture)
1869 if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1870 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1872 s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1874 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1875 int min_num = s->delay_frames[0]->avframe.display_picture_number;
1876 /* Too many delayed frames, so we display the frame with the lowest pts */
1877 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1878 delayed_frame = s->delay_frames[0];
1880 for (i = 1; s->delay_frames[i]; i++)
1881 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1882 min_num = s->delay_frames[i]->avframe.display_picture_number;
1884 delayed_frame = remove_frame(s->delay_frames, min_num);
1885 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1888 if (delayed_frame) {
1889 delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1890 *(AVFrame*)data = delayed_frame->avframe;
1891 *data_size = sizeof(AVFrame);
1893 } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1894 /* The right frame at the right time :-) */
1895 *(AVFrame*)data = s->current_picture->avframe;
1896 *data_size = sizeof(AVFrame);
1900 s->frame_number = picture->avframe.display_picture_number + 1;
1905 AVCodec ff_dirac_decoder = {
1907 .type = AVMEDIA_TYPE_VIDEO,
1908 .id = CODEC_ID_DIRAC,
1909 .priv_data_size = sizeof(DiracContext),
1910 .init = dirac_decode_init,
1911 .close = dirac_decode_end,
1912 .decode = dirac_decode_frame,
1913 .capabilities = CODEC_CAP_DELAY,
1914 .flush = dirac_decode_flush,
1915 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),