2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * @file libavcodec/diracdec.c
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
34 #include "dirac_arith.h"
35 #include "mpeg12data.h"
41 * The spec limits the number of wavelet decompositions to 4 for both
42 * level 1 (VC-2) and 128 (long-gop default).
43 * 5 decompositions is the maximum before >16-bit buffers are needed.
44 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
45 * the others to 4 decompositions (or 3 for the fidelity filter).
47 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
49 #define MAX_DWT_LEVELS 5
52 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
54 #define MAX_REFERENCE_FRAMES 8
55 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
56 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
57 #define MAX_QUANT 68 /* max quant for VC-2 */
58 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
61 * DiracBlock->ref flags, if set then the block does MC from the given ref
63 #define DIRAC_REF_MASK_REF1 1
64 #define DIRAC_REF_MASK_REF2 2
65 #define DIRAC_REF_MASK_GLOBAL 4
68 * Value of Picture.reference when Picture is not a reference picture, but
69 * is held for delayed output.
71 #define DELAYED_PIC_REF 4
73 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
75 #define CALC_PADDING(size, depth) \
76 (((size + (1 << depth) - 1) >> depth) << depth)
78 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
82 int interpolated[3]; /* 1 if hpel[] is valid */
84 uint8_t *hpel_base[3][4];
91 } u; /* anonymous unions aren't in C99 :( */
95 typedef struct SubBand {
103 struct SubBand *parent;
107 const uint8_t *coeff_data;
110 typedef struct Plane {
119 IDWTELEM *idwt_buf_base;
125 /* block separation (block n+1 starts after this many pixels in block n) */
128 /* amount of overspill on each edge (half of the overlap between blocks) */
132 SubBand band[MAX_DWT_LEVELS][4];
135 typedef struct DiracContext {
136 AVCodecContext *avctx;
138 DiracDSPContext diracdsp;
140 dirac_source_params source;
141 int seen_sequence_header;
142 int frame_number; /* number of the next frame to display */
147 int zero_res; /* zero residue flag */
148 int is_arith; /* whether coeffs use arith or golomb coding */
149 int low_delay; /* use the low delay syntax */
150 int globalmc_flag; /* use global motion compensation */
151 int num_refs; /* number of reference pictures */
153 /* wavelet decoding */
154 unsigned wavelet_depth; /* depth of the IDWT */
155 unsigned wavelet_idx;
158 * schroedinger older than 1.0.8 doesn't store
159 * quant delta if only one codebook exists in a band
161 unsigned old_delta_quant;
162 unsigned codeblock_mode;
167 } codeblock[MAX_DWT_LEVELS+1];
170 unsigned num_x; /* number of horizontal slices */
171 unsigned num_y; /* number of vertical slices */
172 AVRational bytes; /* average bytes per slice */
173 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
177 int pan_tilt[2]; /* pan/tilt vector */
178 int zrs[2][2]; /* zoom/rotate/shear matrix */
179 int perspective[2]; /* perspective vector */
181 unsigned perspective_exp;
184 /* motion compensation */
185 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
186 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
187 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
189 int blwidth; /* number of blocks (horizontally) */
190 int blheight; /* number of blocks (vertically) */
191 int sbwidth; /* number of superblocks (horizontally) */
192 int sbheight; /* number of superblocks (vertically) */
195 DiracBlock *blmotion;
197 uint8_t *edge_emu_buffer[4];
198 uint8_t *edge_emu_buffer_base;
200 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
203 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
205 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
206 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
207 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
208 dirac_weight_func weight_func;
209 dirac_biweight_func biweight_func;
211 DiracFrame *current_picture;
212 DiracFrame *ref_pics[2];
214 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
215 DiracFrame *delay_frames[MAX_DELAY+1];
216 DiracFrame all_frames[MAX_FRAMES];
220 * Dirac Specification ->
221 * Parse code values. 9.6.1 Table 9.1
223 enum dirac_parse_code {
224 pc_seq_header = 0x00,
237 static const uint8_t default_qmat[][4][4] = {
238 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
239 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
242 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
243 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
244 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
247 static const int qscale_tab[MAX_QUANT+1] = {
248 4, 5, 6, 7, 8, 10, 11, 13,
249 16, 19, 23, 27, 32, 38, 45, 54,
250 64, 76, 91, 108, 128, 152, 181, 215,
251 256, 304, 362, 431, 512, 609, 724, 861,
252 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
253 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
254 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
258 static const int qoffset_intra_tab[MAX_QUANT+1] = {
259 1, 2, 3, 4, 4, 5, 6, 7,
260 8, 10, 12, 14, 16, 19, 23, 27,
261 32, 38, 46, 54, 64, 76, 91, 108,
262 128, 152, 181, 216, 256, 305, 362, 431,
263 512, 609, 724, 861, 1024, 1218, 1448, 1722,
264 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
265 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
269 static const int qoffset_inter_tab[MAX_QUANT+1] = {
270 1, 2, 2, 3, 3, 4, 4, 5,
271 6, 7, 9, 10, 12, 14, 17, 20,
272 24, 29, 34, 41, 48, 57, 68, 81,
273 96, 114, 136, 162, 192, 228, 272, 323,
274 384, 457, 543, 646, 768, 913, 1086, 1292,
275 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
276 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
280 /* magic number division by 3 from schroedinger */
281 static inline int divide3(int x)
283 return ((x+1)*21845 + 10922) >> 16;
286 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
288 DiracFrame *remove_pic = NULL;
289 int i, remove_idx = -1;
291 for (i = 0; framelist[i]; i++)
292 if (framelist[i]->avframe.display_picture_number == picnum) {
293 remove_pic = framelist[i];
298 for (i = remove_idx; framelist[i]; i++)
299 framelist[i] = framelist[i+1];
304 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
307 for (i = 0; i < maxframes; i++)
309 framelist[i] = frame;
315 static int alloc_sequence_buffers(DiracContext *s)
317 int sbwidth = DIVRNDUP(s->source.width, 4);
318 int sbheight = DIVRNDUP(s->source.height, 4);
319 int i, w, h, top_padding;
321 /* todo: think more about this / use or set Plane here */
322 for (i = 0; i < 3; i++) {
323 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
324 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
325 w = s->source.width >> (i ? s->chroma_x_shift : 0);
326 h = s->source.height >> (i ? s->chroma_y_shift : 0);
328 /* we allocate the max we support here since num decompositions can
329 * change from frame to frame. Stride is aligned to 16 for SIMD, and
330 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
331 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
333 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
334 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
335 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
337 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
338 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
339 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
340 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
341 return AVERROR(ENOMEM);
345 h = s->source.height;
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc(sbwidth * sbheight);
349 s->blmotion = av_malloc(sbwidth * sbheight * 4 * sizeof(*s->blmotion));
350 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
352 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h*MAX_BLOCKSIZE) * sizeof(*s->mctmp));
353 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
355 if (!s->sbsplit || !s->blmotion)
356 return AVERROR(ENOMEM);
360 static void free_sequence_buffers(DiracContext *s)
364 for (i = 0; i < MAX_FRAMES; i++) {
365 if (s->all_frames[i].avframe.data[0]) {
366 s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
367 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
370 for (j = 0; j < 3; j++)
371 for (k = 1; k < 4; k++)
372 av_freep(&s->all_frames[i].hpel_base[j][k]);
375 memset(s->ref_frames, 0, sizeof(s->ref_frames));
376 memset(s->delay_frames, 0, sizeof(s->delay_frames));
378 for (i = 0; i < 3; i++) {
379 av_freep(&s->plane[i].idwt_buf_base);
380 av_freep(&s->plane[i].idwt_tmp);
383 av_freep(&s->sbsplit);
384 av_freep(&s->blmotion);
385 av_freep(&s->edge_emu_buffer_base);
388 av_freep(&s->mcscratch);
391 static av_cold int dirac_decode_init(AVCodecContext *avctx)
393 DiracContext *s = avctx->priv_data;
395 s->frame_number = -1;
397 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
398 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
399 return AVERROR_PATCHWELCOME;
402 ff_dsputil_init(&s->dsp, avctx);
403 ff_diracdsp_init(&s->diracdsp);
408 static void dirac_decode_flush(AVCodecContext *avctx)
410 DiracContext *s = avctx->priv_data;
411 free_sequence_buffers(s);
412 s->seen_sequence_header = 0;
413 s->frame_number = -1;
416 static av_cold int dirac_decode_end(AVCodecContext *avctx)
418 dirac_decode_flush(avctx);
422 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
424 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
425 SubBand *b, IDWTELEM *buf, int x, int y)
429 int pred_ctx = CTX_ZPZN_F1;
431 /* Check if the parent subband has a 0 in the corresponding position */
433 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
435 if (b->orientation == subband_hl)
436 sign_pred = buf[-b->stride];
438 /* Determine if the pixel has only zeros in its neighbourhood */
440 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
441 if (b->orientation == subband_lh)
444 pred_ctx += !buf[-b->stride];
447 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
449 coeff = (coeff * qfactor + qoffset + 2) >> 2;
450 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
451 coeff = (coeff ^ -sign) + sign;
456 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
460 coeff = svq3_get_ue_golomb(gb);
462 coeff = (coeff * qfactor + qoffset + 2) >> 2;
463 sign = get_bits1(gb);
464 coeff = (coeff ^ -sign) + sign;
470 * Decode the coeffs in the rectangle defined by left, right, top, bottom
471 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
473 static inline void codeblock(DiracContext *s, SubBand *b,
474 GetBitContext *gb, DiracArith *c,
475 int left, int right, int top, int bottom,
476 int blockcnt_one, int is_arith)
478 int x, y, zero_block;
479 int qoffset, qfactor;
482 /* check for any coded coefficients in this codeblock */
485 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
487 zero_block = get_bits1(gb);
493 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
494 int quant = b->quant;
496 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
498 quant += dirac_get_se_golomb(gb);
500 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
506 b->quant = FFMIN(b->quant, MAX_QUANT);
508 qfactor = qscale_tab[b->quant];
509 /* TODO: context pointer? */
511 qoffset = qoffset_intra_tab[b->quant];
513 qoffset = qoffset_inter_tab[b->quant];
515 buf = b->ibuf + top * b->stride;
516 for (y = top; y < bottom; y++) {
517 for (x = left; x < right; x++) {
518 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
520 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
522 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
529 * Dirac Specification ->
530 * 13.3 intra_dc_prediction(band)
532 static inline void intra_dc_prediction(SubBand *b)
534 IDWTELEM *buf = b->ibuf;
537 for (x = 1; x < b->width; x++)
541 for (y = 1; y < b->height; y++) {
542 buf[0] += buf[-b->stride];
544 for (x = 1; x < b->width; x++) {
545 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
546 buf[x] += divide3(pred);
553 * Dirac Specification ->
554 * 13.4.2 Non-skipped subbands. subband_coeffs()
556 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
558 int cb_x, cb_y, left, right, top, bottom;
561 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
562 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
563 int blockcnt_one = (cb_width + cb_height) == 2;
568 init_get_bits(&gb, b->coeff_data, b->length*8);
571 ff_dirac_init_arith_decoder(&c, &gb, b->length);
574 for (cb_y = 0; cb_y < cb_height; cb_y++) {
575 bottom = (b->height * (cb_y+1)) / cb_height;
577 for (cb_x = 0; cb_x < cb_width; cb_x++) {
578 right = (b->width * (cb_x+1)) / cb_width;
579 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
585 if (b->orientation == subband_ll && s->num_refs == 0)
586 intra_dc_prediction(b);
589 static int decode_subband_arith(AVCodecContext *avctx, void *b)
591 DiracContext *s = avctx->priv_data;
592 decode_subband_internal(s, b, 1);
596 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
598 DiracContext *s = avctx->priv_data;
600 decode_subband_internal(s, *b, 0);
605 * Dirac Specification ->
606 * [DIRAC_STD] 13.4.1 core_transform_data()
608 static void decode_component(DiracContext *s, int comp)
610 AVCodecContext *avctx = s->avctx;
611 SubBand *bands[3*MAX_DWT_LEVELS+1];
612 enum dirac_subband orientation;
613 int level, num_bands = 0;
615 /* Unpack all subbands at all levels. */
616 for (level = 0; level < s->wavelet_depth; level++) {
617 for (orientation = !!level; orientation < 4; orientation++) {
618 SubBand *b = &s->plane[comp].band[level][orientation];
619 bands[num_bands++] = b;
621 align_get_bits(&s->gb);
622 /* [DIRAC_STD] 13.4.2 subband() */
623 b->length = svq3_get_ue_golomb(&s->gb);
625 b->quant = svq3_get_ue_golomb(&s->gb);
626 align_get_bits(&s->gb);
627 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
628 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
629 skip_bits_long(&s->gb, b->length*8);
632 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
634 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
635 NULL, 4-!!level, sizeof(SubBand));
637 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
639 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
642 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
643 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
644 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
645 int slice_x, int slice_y, int bits_end,
646 SubBand *b1, SubBand *b2)
648 int left = b1->width * slice_x / s->lowdelay.num_x;
649 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
650 int top = b1->height * slice_y / s->lowdelay.num_y;
651 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
653 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
654 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
656 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
657 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
659 /* we have to constantly check for overread since the spec explictly
660 requires this, with the meaning that all remaining coeffs are set to 0 */
661 if (get_bits_count(gb) >= bits_end)
664 for (y = top; y < bottom; y++) {
665 for (x = left; x < right; x++) {
666 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
667 if (get_bits_count(gb) >= bits_end)
670 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
671 if (get_bits_count(gb) >= bits_end)
681 struct lowdelay_slice {
690 * Dirac Specification ->
691 * 13.5.2 Slices. slice(sx,sy)
693 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
695 DiracContext *s = avctx->priv_data;
696 struct lowdelay_slice *slice = arg;
697 GetBitContext *gb = &slice->gb;
698 enum dirac_subband orientation;
699 int level, quant, chroma_bits, chroma_end;
701 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
702 int length_bits = av_log2(8 * slice->bytes)+1;
703 int luma_bits = get_bits_long(gb, length_bits);
704 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
706 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
707 for (level = 0; level < s->wavelet_depth; level++)
708 for (orientation = !!level; orientation < 4; orientation++) {
709 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
710 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
711 &s->plane[0].band[level][orientation], NULL);
714 /* consume any unused bits from luma */
715 skip_bits_long(gb, get_bits_count(gb) - luma_end);
717 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
718 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
719 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
720 for (level = 0; level < s->wavelet_depth; level++)
721 for (orientation = !!level; orientation < 4; orientation++) {
722 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
723 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
724 &s->plane[1].band[level][orientation],
725 &s->plane[2].band[level][orientation]);
732 * Dirac Specification ->
733 * 13.5.1 low_delay_transform_data()
735 static void decode_lowdelay(DiracContext *s)
737 AVCodecContext *avctx = s->avctx;
738 int slice_x, slice_y, bytes, bufsize;
740 struct lowdelay_slice *slices;
743 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
745 align_get_bits(&s->gb);
746 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
747 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
748 bufsize = get_bits_left(&s->gb);
750 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
751 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
752 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
753 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
755 slices[slice_num].bytes = bytes;
756 slices[slice_num].slice_x = slice_x;
757 slices[slice_num].slice_y = slice_y;
758 init_get_bits(&slices[slice_num].gb, buf, bufsize);
765 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
766 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
767 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
768 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
769 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
773 static void init_planes(DiracContext *s)
775 int i, w, h, level, orientation;
777 for (i = 0; i < 3; i++) {
778 Plane *p = &s->plane[i];
780 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
781 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
782 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
783 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
784 p->idwt_stride = FFALIGN(p->idwt_width, 8);
786 for (level = s->wavelet_depth-1; level >= 0; level--) {
789 for (orientation = !!level; orientation < 4; orientation++) {
790 SubBand *b = &p->band[level][orientation];
792 b->ibuf = p->idwt_buf;
794 b->stride = p->idwt_stride << (s->wavelet_depth - level);
797 b->orientation = orientation;
802 b->ibuf += b->stride>>1;
805 b->parent = &p->band[level-1][orientation];
810 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
811 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
812 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
813 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
816 p->xoffset = (p->xblen - p->xbsep)/2;
817 p->yoffset = (p->yblen - p->ybsep)/2;
822 * Unpack the motion compensation parameters
823 * Dirac Specification ->
824 * 11.2 Picture prediction data. picture_prediction()
826 static int dirac_unpack_prediction_parameters(DiracContext *s)
828 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
829 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
831 GetBitContext *gb = &s->gb;
835 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
836 /* Luma and Chroma are equal. 11.2.3 */
837 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
840 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
845 s->plane[0].xblen = svq3_get_ue_golomb(gb);
846 s->plane[0].yblen = svq3_get_ue_golomb(gb);
847 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
848 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
850 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
851 s->plane[0].xblen = default_blen[idx-1];
852 s->plane[0].yblen = default_blen[idx-1];
853 s->plane[0].xbsep = default_bsep[idx-1];
854 s->plane[0].ybsep = default_bsep[idx-1];
856 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
857 Calculated in function dirac_unpack_block_motion_data */
859 if (s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
860 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
863 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
864 av_log(s->avctx, AV_LOG_ERROR, "Block seperation greater than size\n");
867 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
868 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
872 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
873 Read motion vector precision */
874 s->mv_precision = svq3_get_ue_golomb(gb);
875 if (s->mv_precision > 3) {
876 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
880 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
881 Read the global motion compensation parameters */
882 s->globalmc_flag = get_bits1(gb);
883 if (s->globalmc_flag) {
884 memset(s->globalmc, 0, sizeof(s->globalmc));
885 /* [DIRAC_STD] pan_tilt(gparams) */
886 for (ref = 0; ref < s->num_refs; ref++) {
888 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
889 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
891 /* [DIRAC_STD] zoom_rotate_shear(gparams)
892 zoom/rotation/shear parameters */
894 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
895 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
896 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
897 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
898 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
900 s->globalmc[ref].zrs[0][0] = 1;
901 s->globalmc[ref].zrs[1][1] = 1;
903 /* [DIRAC_STD] perspective(gparams) */
905 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
906 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
907 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
912 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
913 Picture prediction mode, not currently used. */
914 if (svq3_get_ue_golomb(gb)) {
915 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
919 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
920 just data read, weight calculation will be done later on. */
921 s->weight_log2denom = 1;
926 s->weight_log2denom = svq3_get_ue_golomb(gb);
927 s->weight[0] = dirac_get_se_golomb(gb);
928 if (s->num_refs == 2)
929 s->weight[1] = dirac_get_se_golomb(gb);
935 * Dirac Specification ->
936 * 11.3 Wavelet transform data. wavelet_transform()
938 static int dirac_unpack_idwt_params(DiracContext *s)
940 GetBitContext *gb = &s->gb;
944 #define CHECKEDREAD(dst, cond, errmsg) \
945 tmp = svq3_get_ue_golomb(gb); \
947 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
954 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
958 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
959 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
961 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
964 /* Codeblock paramaters (core syntax only) */
966 for (i = 0; i <= s->wavelet_depth; i++) {
967 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
968 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
971 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
973 for (i = 0; i <= s->wavelet_depth; i++)
974 s->codeblock[i].width = s->codeblock[i].height = 1;
976 /* Slice parameters + quantization matrix*/
977 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
978 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
979 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
980 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
981 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
983 if (s->lowdelay.bytes.den <= 0) {
984 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
985 return AVERROR_INVALIDDATA;
988 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
990 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
991 /* custom quantization matrix */
992 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
993 for (level = 0; level < s->wavelet_depth; level++) {
994 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
995 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
996 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
999 /* default quantization matrix */
1000 for (level = 0; level < s->wavelet_depth; level++)
1001 for (i = 0; i < 4; i++) {
1002 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1003 /* haar with no shift differs for different depths */
1004 if (s->wavelet_idx == 3)
1005 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1012 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1014 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1021 return sbsplit[-stride];
1023 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1026 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1033 return block[-1].ref & refmask;
1035 return block[-stride].ref & refmask;
1037 /* return the majority */
1038 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1039 return (pred >> 1) & refmask;
1042 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1046 memset(block->u.dc, 0, sizeof(block->u.dc));
1048 if (x && !(block[-1].ref & 3)) {
1049 for (i = 0; i < 3; i++)
1050 block->u.dc[i] += block[-1].u.dc[i];
1054 if (y && !(block[-stride].ref & 3)) {
1055 for (i = 0; i < 3; i++)
1056 block->u.dc[i] += block[-stride].u.dc[i];
1060 if (x && y && !(block[-1-stride].ref & 3)) {
1061 for (i = 0; i < 3; i++)
1062 block->u.dc[i] += block[-1-stride].u.dc[i];
1067 for (i = 0; i < 3; i++)
1068 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1069 } else if (n == 3) {
1070 for (i = 0; i < 3; i++)
1071 block->u.dc[i] = divide3(block->u.dc[i]);
1075 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1078 int refmask = ref+1;
1079 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1082 if (x && (block[-1].ref & mask) == refmask)
1083 pred[n++] = block[-1].u.mv[ref];
1085 if (y && (block[-stride].ref & mask) == refmask)
1086 pred[n++] = block[-stride].u.mv[ref];
1088 if (x && y && (block[-stride-1].ref & mask) == refmask)
1089 pred[n++] = block[-stride-1].u.mv[ref];
1093 block->u.mv[ref][0] = 0;
1094 block->u.mv[ref][1] = 0;
1097 block->u.mv[ref][0] = pred[0][0];
1098 block->u.mv[ref][1] = pred[0][1];
1101 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1102 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1105 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1106 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1111 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1113 int ez = s->globalmc[ref].zrs_exp;
1114 int ep = s->globalmc[ref].perspective_exp;
1115 int (*A)[2] = s->globalmc[ref].zrs;
1116 int *b = s->globalmc[ref].pan_tilt;
1117 int *c = s->globalmc[ref].perspective;
1119 int m = (1<<ep) - (c[0]*x + c[1]*y);
1120 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1121 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1123 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1124 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1127 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1128 int stride, int x, int y)
1132 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1133 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1135 if (s->num_refs == 2) {
1136 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1137 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1141 pred_block_dc(block, stride, x, y);
1142 for (i = 0; i < 3; i++)
1143 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1147 if (s->globalmc_flag) {
1148 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1149 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1152 for (i = 0; i < s->num_refs; i++)
1153 if (block->ref & (i+1)) {
1154 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1155 global_mv(s, block, x, y, i);
1157 pred_mv(block, stride, x, y, i);
1158 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1159 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1165 * Copies the current block to the other blocks covered by the current superblock split mode
1167 static void propagate_block_data(DiracBlock *block, int stride, int size)
1170 DiracBlock *dst = block;
1172 for (x = 1; x < size; x++)
1175 for (y = 1; y < size; y++) {
1177 for (x = 0; x < size; x++)
1183 * Dirac Specification ->
1184 * 12. Block motion data syntax
1186 static int dirac_unpack_block_motion_data(DiracContext *s)
1188 GetBitContext *gb = &s->gb;
1189 uint8_t *sbsplit = s->sbsplit;
1191 DiracArith arith[8];
1195 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1196 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1197 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1198 s->blwidth = 4 * s->sbwidth;
1199 s->blheight = 4 * s->sbheight;
1201 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1202 decode superblock split modes */
1203 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1204 for (y = 0; y < s->sbheight; y++) {
1205 for (x = 0; x < s->sbwidth; x++) {
1206 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1209 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1211 sbsplit += s->sbwidth;
1214 /* setup arith decoding */
1215 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1216 for (i = 0; i < s->num_refs; i++) {
1217 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1218 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1220 for (i = 0; i < 3; i++)
1221 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1223 for (y = 0; y < s->sbheight; y++)
1224 for (x = 0; x < s->sbwidth; x++) {
1225 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1226 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1228 for (q = 0; q < blkcnt; q++)
1229 for (p = 0; p < blkcnt; p++) {
1230 int bx = 4 * x + p*step;
1231 int by = 4 * y + q*step;
1232 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1233 decode_block_params(s, arith, block, s->blwidth, bx, by);
1234 propagate_block_data(block, s->blwidth, step);
1241 static int weight(int i, int blen, int offset)
1243 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1244 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1248 else if (i > blen-1 - 2*offset)
1249 return ROLLOFF(blen-1 - i);
1253 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1254 int left, int right, int wy)
1257 for (x = 0; left && x < p->xblen >> 1; x++)
1258 obmc_weight[x] = wy*8;
1259 for (; x < p->xblen >> right; x++)
1260 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1261 for (; x < p->xblen; x++)
1262 obmc_weight[x] = wy*8;
1263 for (; x < stride; x++)
1267 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1268 int left, int right, int top, int bottom)
1271 for (y = 0; top && y < p->yblen >> 1; y++) {
1272 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1273 obmc_weight += stride;
1275 for (; y < p->yblen >> bottom; y++) {
1276 int wy = weight(y, p->yblen, p->yoffset);
1277 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1278 obmc_weight += stride;
1280 for (; y < p->yblen; y++) {
1281 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1282 obmc_weight += stride;
1286 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1289 int bottom = by == s->blheight-1;
1291 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1292 if (top || bottom || by == 1) {
1293 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1294 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1295 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1299 static const uint8_t epel_weights[4][4][4] = {
1319 * For block x,y, determine which of the hpel planes to do bilinear
1320 * interpolation from and set src[] to the location in each hpel plane
1323 * @return the index of the put_dirac_pixels_tab function to use
1324 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1326 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1327 int x, int y, int ref, int plane)
1329 Plane *p = &s->plane[plane];
1330 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1331 int motion_x = block->u.mv[ref][0];
1332 int motion_y = block->u.mv[ref][1];
1333 int mx, my, i, epel, nplanes = 0;
1336 motion_x >>= s->chroma_x_shift;
1337 motion_y >>= s->chroma_y_shift;
1340 mx = motion_x & ~(-1 << s->mv_precision);
1341 my = motion_y & ~(-1 << s->mv_precision);
1342 motion_x >>= s->mv_precision;
1343 motion_y >>= s->mv_precision;
1344 /* normalize subpel coordinates to epel */
1345 /* TODO: template this function? */
1346 mx <<= 3 - s->mv_precision;
1347 my <<= 3 - s->mv_precision;
1356 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1360 for (i = 0; i < 4; i++)
1361 src[i] = ref_hpel[i] + y*p->stride + x;
1363 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1364 we increment x/y because the edge changes for half of the pixels */
1371 src[0] += p->stride;
1372 src[1] += p->stride;
1380 /* check if we really only need 2 planes since either mx or my is
1381 a hpel position. (epel weights of 0 handle this there) */
1383 /* mx == 0: average [0] and [2]
1384 mx == 4: average [1] and [3] */
1385 src[!mx] = src[2 + !!mx];
1387 } else if (!(my&3)) {
1388 src[0] = src[(my>>1) ];
1389 src[1] = src[(my>>1)+1];
1393 /* adjust the ordering if needed so the weights work */
1395 FFSWAP(const uint8_t *, src[0], src[1]);
1396 FFSWAP(const uint8_t *, src[2], src[3]);
1399 FFSWAP(const uint8_t *, src[0], src[2]);
1400 FFSWAP(const uint8_t *, src[1], src[3]);
1402 src[4] = epel_weights[my&3][mx&3];
1406 /* fixme: v/h _edge_pos */
1407 if ((unsigned)x > p->width +EDGE_WIDTH/2 - p->xblen ||
1408 (unsigned)y > p->height+EDGE_WIDTH/2 - p->yblen) {
1409 for (i = 0; i < nplanes; i++) {
1410 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1411 p->xblen, p->yblen, x, y,
1412 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1413 src[i] = s->edge_emu_buffer[i];
1416 return (nplanes>>1) + epel;
1419 static void add_dc(uint16_t *dst, int dc, int stride,
1420 uint8_t *obmc_weight, int xblen, int yblen)
1425 for (y = 0; y < yblen; y++) {
1426 for (x = 0; x < xblen; x += 2) {
1427 dst[x ] += dc * obmc_weight[x ];
1428 dst[x+1] += dc * obmc_weight[x+1];
1431 obmc_weight += MAX_BLOCKSIZE;
1435 static void block_mc(DiracContext *s, DiracBlock *block,
1436 uint16_t *mctmp, uint8_t *obmc_weight,
1437 int plane, int dstx, int dsty)
1439 Plane *p = &s->plane[plane];
1440 const uint8_t *src[5];
1443 switch (block->ref&3) {
1445 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1449 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1450 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1452 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1453 s->weight[0] + s->weight[1], p->yblen);
1456 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1457 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1458 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1459 if (s->biweight_func) {
1460 /* fixme: +32 is a quick hack */
1461 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1462 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1463 s->weight[0], s->weight[1], p->yblen);
1465 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1468 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1471 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1473 Plane *p = &s->plane[plane];
1474 int x, dstx = p->xbsep - p->xoffset;
1476 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1479 for (x = 1; x < s->blwidth-1; x++) {
1480 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1484 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1487 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1495 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1496 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1497 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1498 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1499 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1500 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1502 s->weight_func = NULL;
1503 s->biweight_func = NULL;
1507 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1509 /* chroma allocates an edge of 8 when subsampled
1510 which for 4:2:2 means an h edge of 16 and v edge of 8
1511 just use 8 for everything for the moment */
1512 int i, edge = EDGE_WIDTH/2;
1514 ref->hpel[plane][0] = ref->avframe.data[plane];
1515 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1517 /* no need for hpel if we only have fpel vectors */
1518 if (!s->mv_precision)
1521 for (i = 1; i < 4; i++) {
1522 if (!ref->hpel_base[plane][i])
1523 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1524 /* we need to be 16-byte aligned even for chroma */
1525 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1528 if (!ref->interpolated[plane]) {
1529 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1530 ref->hpel[plane][3], ref->hpel[plane][0],
1531 ref->avframe.linesize[plane], width, height);
1532 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1533 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1534 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1536 ref->interpolated[plane] = 1;
1540 * Dirac Specification ->
1541 * 13.0 Transform data syntax. transform_data()
1543 static int dirac_decode_frame_internal(DiracContext *s)
1546 int y, i, comp, dsty;
1549 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1550 for (comp = 0; comp < 3; comp++) {
1551 Plane *p = &s->plane[comp];
1552 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1558 for (comp = 0; comp < 3; comp++) {
1559 Plane *p = &s->plane[comp];
1560 uint8_t *frame = s->current_picture->avframe.data[comp];
1562 /* FIXME: small resolutions */
1563 for (i = 0; i < 4; i++)
1564 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1566 if (!s->zero_res && !s->low_delay)
1568 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1569 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1571 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1572 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1575 if (!s->num_refs) { /* intra */
1576 for (y = 0; y < p->height; y += 16) {
1577 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1578 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1579 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1581 } else { /* inter */
1582 int rowheight = p->ybsep*p->stride;
1584 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1586 for (i = 0; i < s->num_refs; i++)
1587 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1589 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1592 for (y = 0; y < s->blheight; y++) {
1594 start = FFMAX(dsty, 0);
1595 uint16_t *mctmp = s->mctmp + y*rowheight;
1596 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1598 init_obmc_weights(s, p, y);
1600 if (y == s->blheight-1 || start+p->ybsep > p->height)
1601 h = p->height - start;
1603 h = p->ybsep - (start - dsty);
1607 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1608 mc_row(s, blocks, mctmp, comp, dsty);
1610 mctmp += (start - dsty)*p->stride + p->xoffset;
1611 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1612 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1613 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1625 * Dirac Specification ->
1626 * 11.1.1 Picture Header. picture_header()
1628 static int dirac_decode_picture_header(DiracContext *s)
1631 int i, j, refnum, refdist;
1632 GetBitContext *gb = &s->gb;
1634 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1635 picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1638 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1640 /* if this is the first keyframe after a sequence header, start our
1641 reordering from here */
1642 if (s->frame_number < 0)
1643 s->frame_number = picnum;
1645 s->ref_pics[0] = s->ref_pics[1] = NULL;
1646 for (i = 0; i < s->num_refs; i++) {
1647 refnum = picnum + dirac_get_se_golomb(gb);
1650 /* find the closest reference to the one we want */
1651 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1652 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1653 if (s->ref_frames[j]
1654 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1655 s->ref_pics[i] = s->ref_frames[j];
1656 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1659 if (!s->ref_pics[i] || refdist)
1660 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1662 /* if there were no references at all, allocate one */
1663 if (!s->ref_pics[i])
1664 for (j = 0; j < MAX_FRAMES; j++)
1665 if (!s->all_frames[j].avframe.data[0]) {
1666 s->ref_pics[i] = &s->all_frames[j];
1667 s->avctx->get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1671 /* retire the reference frames that are not used anymore */
1672 if (s->current_picture->avframe.reference) {
1673 retire = picnum + dirac_get_se_golomb(gb);
1674 if (retire != picnum) {
1675 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1678 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1680 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1683 /* if reference array is full, remove the oldest as per the spec */
1684 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1685 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1686 remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1691 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1693 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1696 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1703 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *data_size)
1705 DiracFrame *out = s->delay_frames[0];
1708 /* find frame with lowest picture number */
1709 for (i = 1; s->delay_frames[i]; i++)
1710 if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1711 out = s->delay_frames[i];
1715 for (i = out_idx; s->delay_frames[i]; i++)
1716 s->delay_frames[i] = s->delay_frames[i+1];
1719 out->avframe.reference ^= DELAYED_PIC_REF;
1720 *data_size = sizeof(AVFrame);
1721 *(AVFrame *)picture = out->avframe;
1728 * Dirac Specification ->
1729 * 9.6 Parse Info Header Syntax. parse_info()
1730 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1732 #define DATA_UNIT_HEADER_SIZE 13
1734 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1735 inside the function parse_sequence() */
1736 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1738 DiracContext *s = avctx->priv_data;
1739 DiracFrame *pic = NULL;
1740 int i, parse_code = buf[4];
1743 if (size < DATA_UNIT_HEADER_SIZE)
1746 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1748 if (parse_code == pc_seq_header) {
1749 if (s->seen_sequence_header)
1752 /* [DIRAC_STD] 10. Sequence header */
1753 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1756 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1758 if (alloc_sequence_buffers(s))
1761 s->seen_sequence_header = 1;
1762 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1763 free_sequence_buffers(s);
1764 s->seen_sequence_header = 0;
1765 } else if (parse_code == pc_aux_data) {
1766 if (buf[13] == 1) { /* encoder implementation/version */
1768 /* versions older than 1.0.8 don't store quant delta for
1769 subbands with only one codeblock */
1770 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1771 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1772 s->old_delta_quant = 1;
1774 } else if (parse_code & 0x8) { /* picture data unit */
1775 if (!s->seen_sequence_header) {
1776 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1780 /* find an unused frame */
1781 for (i = 0; i < MAX_FRAMES; i++)
1782 if (s->all_frames[i].avframe.data[0] == NULL)
1783 pic = &s->all_frames[i];
1785 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1789 avcodec_get_frame_defaults(&pic->avframe);
1791 /* [DIRAC_STD] Defined in 9.6.1 ... */
1792 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1794 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1798 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1799 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1800 pic->avframe.reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1801 pic->avframe.key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1802 pic->avframe.pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1804 if (avctx->get_buffer(avctx, &pic->avframe) < 0) {
1805 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1808 s->current_picture = pic;
1809 s->plane[0].stride = pic->avframe.linesize[0];
1810 s->plane[1].stride = pic->avframe.linesize[1];
1811 s->plane[2].stride = pic->avframe.linesize[2];
1813 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1814 if (dirac_decode_picture_header(s))
1817 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1818 if (dirac_decode_frame_internal(s))
1824 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *pkt)
1826 DiracContext *s = avctx->priv_data;
1827 DiracFrame *picture = data;
1828 uint8_t *buf = pkt->data;
1829 int buf_size = pkt->size;
1830 int i, data_unit_size, buf_idx = 0;
1832 /* release unused frames */
1833 for (i = 0; i < MAX_FRAMES; i++)
1834 if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1835 avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1836 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1839 s->current_picture = NULL;
1842 /* end of stream, so flush delayed pics */
1844 return get_delayed_pic(s, (AVFrame *)data, data_size);
1847 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1848 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1849 BBCD start code search */
1850 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1851 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1852 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1855 /* BBCD found or end of data */
1856 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1859 data_unit_size = AV_RB32(buf+buf_idx+5);
1860 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1861 if(buf_idx + data_unit_size > buf_size)
1862 av_log(s->avctx, AV_LOG_ERROR,
1863 "Data unit with size %d is larger than input buffer, discarding\n",
1868 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1869 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1871 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1874 buf_idx += data_unit_size;
1877 if (!s->current_picture)
1880 if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1881 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1883 s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1885 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1886 int min_num = s->delay_frames[0]->avframe.display_picture_number;
1887 /* Too many delayed frames, so we display the frame with the lowest pts */
1888 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1889 delayed_frame = s->delay_frames[0];
1891 for (i = 1; s->delay_frames[i]; i++)
1892 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1893 min_num = s->delay_frames[i]->avframe.display_picture_number;
1895 delayed_frame = remove_frame(s->delay_frames, min_num);
1896 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1899 if (delayed_frame) {
1900 delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1901 *(AVFrame*)data = delayed_frame->avframe;
1902 *data_size = sizeof(AVFrame);
1904 } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1905 /* The right frame at the right time :-) */
1906 *(AVFrame*)data = s->current_picture->avframe;
1907 *data_size = sizeof(AVFrame);
1911 s->frame_number = picture->avframe.display_picture_number + 1;
1916 AVCodec ff_dirac_decoder = {
1918 .type = AVMEDIA_TYPE_VIDEO,
1919 .id = CODEC_ID_DIRAC,
1920 .priv_data_size = sizeof(DiracContext),
1921 .init = dirac_decode_init,
1922 .close = dirac_decode_end,
1923 .decode = dirac_decode_frame,
1924 .capabilities = CODEC_CAP_DELAY,
1925 .flush = dirac_decode_flush,
1926 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),