2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * @file libavcodec/diracdec.c
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
34 #include "dirac_arith.h"
35 #include "mpeg12data.h"
41 * The spec limits the number of wavelet decompositions to 4 for both
42 * level 1 (VC-2) and 128 (long-gop default).
43 * 5 decompositions is the maximum before >16-bit buffers are needed.
44 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
45 * the others to 4 decompositions (or 3 for the fidelity filter).
47 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
49 #define MAX_DWT_LEVELS 5
52 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
54 #define MAX_REFERENCE_FRAMES 8
55 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
56 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
57 #define MAX_QUANT 68 /* max quant for VC-2 */
58 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
61 * DiracBlock->ref flags, if set then the block does MC from the given ref
63 #define DIRAC_REF_MASK_REF1 1
64 #define DIRAC_REF_MASK_REF2 2
65 #define DIRAC_REF_MASK_GLOBAL 4
68 * Value of Picture.reference when Picture is not a reference picture, but
69 * is held for delayed output.
71 #define DELAYED_PIC_REF 4
73 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
75 #define CALC_PADDING(size, depth) \
76 (((size + (1 << depth) - 1) >> depth) << depth)
78 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
82 int interpolated[3]; /* 1 if hpel[] is valid */
84 uint8_t *hpel_base[3][4];
91 } u; /* anonymous unions aren't in C99 :( */
95 typedef struct SubBand {
103 struct SubBand *parent;
107 const uint8_t *coeff_data;
110 typedef struct Plane {
119 IDWTELEM *idwt_buf_base;
125 /* block separation (block n+1 starts after this many pixels in block n) */
128 /* amount of overspill on each edge (half of the overlap between blocks) */
132 SubBand band[MAX_DWT_LEVELS][4];
135 typedef struct DiracContext {
136 AVCodecContext *avctx;
138 DiracDSPContext diracdsp;
140 dirac_source_params source;
141 int seen_sequence_header;
142 int frame_number; /* number of the next frame to display */
147 int zero_res; /* zero residue flag */
148 int is_arith; /* whether coeffs use arith or golomb coding */
149 int low_delay; /* use the low delay syntax */
150 int globalmc_flag; /* use global motion compensation */
151 int num_refs; /* number of reference pictures */
153 /* wavelet decoding */
154 unsigned wavelet_depth; /* depth of the IDWT */
155 unsigned wavelet_idx;
158 * schroedinger older than 1.0.8 doesn't store
159 * quant delta if only one codebook exists in a band
161 unsigned old_delta_quant;
162 unsigned codeblock_mode;
167 } codeblock[MAX_DWT_LEVELS+1];
170 unsigned num_x; /* number of horizontal slices */
171 unsigned num_y; /* number of vertical slices */
172 AVRational bytes; /* average bytes per slice */
173 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
177 int pan_tilt[2]; /* pan/tilt vector */
178 int zrs[2][2]; /* zoom/rotate/shear matrix */
179 int perspective[2]; /* perspective vector */
181 unsigned perspective_exp;
184 /* motion compensation */
185 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
186 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
187 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
189 int blwidth; /* number of blocks (horizontally) */
190 int blheight; /* number of blocks (vertically) */
191 int sbwidth; /* number of superblocks (horizontally) */
192 int sbheight; /* number of superblocks (vertically) */
195 DiracBlock *blmotion;
197 uint8_t *edge_emu_buffer[4];
198 uint8_t *edge_emu_buffer_base;
200 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
203 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
205 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
206 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
207 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
208 dirac_weight_func weight_func;
209 dirac_biweight_func biweight_func;
211 DiracFrame *current_picture;
212 DiracFrame *ref_pics[2];
214 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
215 DiracFrame *delay_frames[MAX_DELAY+1];
216 DiracFrame all_frames[MAX_FRAMES];
220 * Dirac Specification ->
221 * Parse code values. 9.6.1 Table 9.1
223 enum dirac_parse_code {
224 pc_seq_header = 0x00,
237 static const uint8_t default_qmat[][4][4] = {
238 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
239 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
242 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
243 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
244 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
247 static const int qscale_tab[MAX_QUANT+1] = {
248 4, 5, 6, 7, 8, 10, 11, 13,
249 16, 19, 23, 27, 32, 38, 45, 54,
250 64, 76, 91, 108, 128, 152, 181, 215,
251 256, 304, 362, 431, 512, 609, 724, 861,
252 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
253 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
254 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
258 static const int qoffset_intra_tab[MAX_QUANT+1] = {
259 1, 2, 3, 4, 4, 5, 6, 7,
260 8, 10, 12, 14, 16, 19, 23, 27,
261 32, 38, 46, 54, 64, 76, 91, 108,
262 128, 152, 181, 216, 256, 305, 362, 431,
263 512, 609, 724, 861, 1024, 1218, 1448, 1722,
264 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
265 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
269 static const int qoffset_inter_tab[MAX_QUANT+1] = {
270 1, 2, 2, 3, 3, 4, 4, 5,
271 6, 7, 9, 10, 12, 14, 17, 20,
272 24, 29, 34, 41, 48, 57, 68, 81,
273 96, 114, 136, 162, 192, 228, 272, 323,
274 384, 457, 543, 646, 768, 913, 1086, 1292,
275 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
276 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
280 /* magic number division by 3 from schroedinger */
281 static inline int divide3(int x)
283 return ((x+1)*21845 + 10922) >> 16;
286 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
288 DiracFrame *remove_pic = NULL;
289 int i, remove_idx = -1;
291 for (i = 0; framelist[i]; i++)
292 if (framelist[i]->avframe.display_picture_number == picnum) {
293 remove_pic = framelist[i];
298 for (i = remove_idx; framelist[i]; i++)
299 framelist[i] = framelist[i+1];
304 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
307 for (i = 0; i < maxframes; i++)
309 framelist[i] = frame;
315 static int alloc_sequence_buffers(DiracContext *s)
317 int sbwidth = DIVRNDUP(s->source.width, 4);
318 int sbheight = DIVRNDUP(s->source.height, 4);
319 int i, w, h, top_padding;
321 /* todo: think more about this / use or set Plane here */
322 for (i = 0; i < 3; i++) {
323 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
324 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
325 w = s->source.width >> (i ? s->chroma_x_shift : 0);
326 h = s->source.height >> (i ? s->chroma_y_shift : 0);
328 /* we allocate the max we support here since num decompositions can
329 * change from frame to frame. Stride is aligned to 16 for SIMD, and
330 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
331 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
333 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
334 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
335 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
337 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
338 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
339 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
340 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
341 return AVERROR(ENOMEM);
345 h = s->source.height;
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc(sbwidth * sbheight);
349 s->blmotion = av_malloc(sbwidth * sbheight * 4 * sizeof(*s->blmotion));
350 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
352 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h*MAX_BLOCKSIZE) * sizeof(*s->mctmp));
353 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
355 if (!s->sbsplit || !s->blmotion)
356 return AVERROR(ENOMEM);
360 static void free_sequence_buffers(DiracContext *s)
364 for (i = 0; i < MAX_FRAMES; i++) {
365 if (s->all_frames[i].avframe.data[0]) {
366 s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
367 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
370 for (j = 0; j < 3; j++)
371 for (k = 1; k < 4; k++)
372 av_freep(&s->all_frames[i].hpel_base[j][k]);
375 memset(s->ref_frames, 0, sizeof(s->ref_frames));
376 memset(s->delay_frames, 0, sizeof(s->delay_frames));
378 for (i = 0; i < 3; i++) {
379 av_freep(&s->plane[i].idwt_buf_base);
380 av_freep(&s->plane[i].idwt_tmp);
383 av_freep(&s->sbsplit);
384 av_freep(&s->blmotion);
385 av_freep(&s->edge_emu_buffer_base);
388 av_freep(&s->mcscratch);
391 static av_cold int dirac_decode_init(AVCodecContext *avctx)
393 DiracContext *s = avctx->priv_data;
395 s->frame_number = -1;
397 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
398 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
399 return AVERROR_PATCHWELCOME;
402 dsputil_init(&s->dsp, avctx);
403 ff_diracdsp_init(&s->diracdsp);
408 static void dirac_decode_flush(AVCodecContext *avctx)
410 DiracContext *s = avctx->priv_data;
411 free_sequence_buffers(s);
412 s->seen_sequence_header = 0;
413 s->frame_number = -1;
416 static av_cold int dirac_decode_end(AVCodecContext *avctx)
418 dirac_decode_flush(avctx);
422 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
424 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
425 SubBand *b, IDWTELEM *buf, int x, int y)
429 int pred_ctx = CTX_ZPZN_F1;
431 /* Check if the parent subband has a 0 in the corresponding position */
433 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
435 if (b->orientation == subband_hl)
436 sign_pred = buf[-b->stride];
438 /* Determine if the pixel has only zeros in its neighbourhood */
440 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
441 if (b->orientation == subband_lh)
444 pred_ctx += !buf[-b->stride];
447 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
449 coeff = (coeff * qfactor + qoffset + 2) >> 2;
450 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
451 coeff = (coeff ^ -sign) + sign;
456 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
460 coeff = svq3_get_ue_golomb(gb);
462 coeff = (coeff * qfactor + qoffset + 2) >> 2;
463 sign = get_bits1(gb);
464 coeff = (coeff ^ -sign) + sign;
470 * Decode the coeffs in the rectangle defined by left, right, top, bottom
471 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
473 static inline void codeblock(DiracContext *s, SubBand *b,
474 GetBitContext *gb, DiracArith *c,
475 int left, int right, int top, int bottom,
476 int blockcnt_one, int is_arith)
478 int x, y, zero_block;
479 int qoffset, qfactor;
482 /* check for any coded coefficients in this codeblock */
485 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
487 zero_block = get_bits1(gb);
493 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
494 int quant = b->quant;
496 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
498 quant += dirac_get_se_golomb(gb);
500 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
506 b->quant = FFMIN(b->quant, MAX_QUANT);
508 qfactor = qscale_tab[b->quant];
509 /* TODO: context pointer? */
511 qoffset = qoffset_intra_tab[b->quant];
513 qoffset = qoffset_inter_tab[b->quant];
515 buf = b->ibuf + top * b->stride;
516 for (y = top; y < bottom; y++) {
517 for (x = left; x < right; x++) {
518 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
520 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
522 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
529 * Dirac Specification ->
530 * 13.3 intra_dc_prediction(band)
532 static inline void intra_dc_prediction(SubBand *b)
534 IDWTELEM *buf = b->ibuf;
537 for (x = 1; x < b->width; x++)
541 for (y = 1; y < b->height; y++) {
542 buf[0] += buf[-b->stride];
544 for (x = 1; x < b->width; x++) {
545 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
546 buf[x] += divide3(pred);
553 * Dirac Specification ->
554 * 13.4.2 Non-skipped subbands. subband_coeffs()
556 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
558 int cb_x, cb_y, left, right, top, bottom;
561 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
562 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
563 int blockcnt_one = (cb_width + cb_height) == 2;
568 init_get_bits(&gb, b->coeff_data, b->length*8);
571 ff_dirac_init_arith_decoder(&c, &gb, b->length);
574 for (cb_y = 0; cb_y < cb_height; cb_y++) {
575 bottom = (b->height * (cb_y+1)) / cb_height;
577 for (cb_x = 0; cb_x < cb_width; cb_x++) {
578 right = (b->width * (cb_x+1)) / cb_width;
579 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
585 if (b->orientation == subband_ll && s->num_refs == 0)
586 intra_dc_prediction(b);
589 static int decode_subband_arith(AVCodecContext *avctx, void *b)
591 DiracContext *s = avctx->priv_data;
592 decode_subband_internal(s, b, 1);
596 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
598 DiracContext *s = avctx->priv_data;
600 decode_subband_internal(s, *b, 0);
605 * Dirac Specification ->
606 * [DIRAC_STD] 13.4.1 core_transform_data()
608 static void decode_component(DiracContext *s, int comp)
610 AVCodecContext *avctx = s->avctx;
611 SubBand *bands[3*MAX_DWT_LEVELS+1];
612 enum dirac_subband orientation;
613 int level, num_bands = 0;
615 /* Unpack all subbands at all levels. */
616 for (level = 0; level < s->wavelet_depth; level++) {
617 for (orientation = !!level; orientation < 4; orientation++) {
618 SubBand *b = &s->plane[comp].band[level][orientation];
619 bands[num_bands++] = b;
621 align_get_bits(&s->gb);
622 /* [DIRAC_STD] 13.4.2 subband() */
623 b->length = svq3_get_ue_golomb(&s->gb);
625 b->quant = svq3_get_ue_golomb(&s->gb);
626 align_get_bits(&s->gb);
627 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
628 b->length = FFMIN(b->length, get_bits_left(&s->gb)/8);
629 skip_bits_long(&s->gb, b->length*8);
632 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
634 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
635 NULL, 4-!!level, sizeof(SubBand));
637 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
639 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
642 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
643 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
644 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
645 int slice_x, int slice_y, int bits_end,
646 SubBand *b1, SubBand *b2)
648 int left = b1->width * slice_x / s->lowdelay.num_x;
649 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
650 int top = b1->height * slice_y / s->lowdelay.num_y;
651 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
653 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
654 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
656 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
657 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
659 /* we have to constantly check for overread since the spec explictly
660 requires this, with the meaning that all remaining coeffs are set to 0 */
661 if (get_bits_count(gb) >= bits_end)
664 for (y = top; y < bottom; y++) {
665 for (x = left; x < right; x++) {
666 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
667 if (get_bits_count(gb) >= bits_end)
670 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
671 if (get_bits_count(gb) >= bits_end)
681 struct lowdelay_slice {
690 * Dirac Specification ->
691 * 13.5.2 Slices. slice(sx,sy)
693 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
695 DiracContext *s = avctx->priv_data;
696 struct lowdelay_slice *slice = arg;
697 GetBitContext *gb = &slice->gb;
698 enum dirac_subband orientation;
699 int level, quant, chroma_bits, chroma_end;
701 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
702 int length_bits = av_log2(8 * slice->bytes)+1;
703 int luma_bits = get_bits_long(gb, length_bits);
704 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
706 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
707 for (level = 0; level < s->wavelet_depth; level++)
708 for (orientation = !!level; orientation < 4; orientation++) {
709 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
710 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
711 &s->plane[0].band[level][orientation], NULL);
714 /* consume any unused bits from luma */
715 skip_bits_long(gb, get_bits_count(gb) - luma_end);
717 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
718 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
719 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
720 for (level = 0; level < s->wavelet_depth; level++)
721 for (orientation = !!level; orientation < 4; orientation++) {
722 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
723 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
724 &s->plane[1].band[level][orientation],
725 &s->plane[2].band[level][orientation]);
732 * Dirac Specification ->
733 * 13.5.1 low_delay_transform_data()
735 static void decode_lowdelay(DiracContext *s)
737 AVCodecContext *avctx = s->avctx;
738 int slice_x, slice_y, bytes, bufsize;
740 struct lowdelay_slice *slices;
743 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
745 align_get_bits(&s->gb);
746 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
747 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
748 bufsize = get_bits_left(&s->gb);
750 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
751 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
752 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
753 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
755 slices[slice_num].bytes = bytes;
756 slices[slice_num].slice_x = slice_x;
757 slices[slice_num].slice_y = slice_y;
758 init_get_bits(&slices[slice_num].gb, buf, bufsize);
765 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
766 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
767 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
768 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
769 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
773 static void init_planes(DiracContext *s)
775 int i, w, h, level, orientation;
777 for (i = 0; i < 3; i++) {
778 Plane *p = &s->plane[i];
780 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
781 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
782 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
783 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
784 p->idwt_stride = FFALIGN(p->idwt_width, 8);
786 for (level = s->wavelet_depth-1; level >= 0; level--) {
789 for (orientation = !!level; orientation < 4; orientation++) {
790 SubBand *b = &p->band[level][orientation];
792 b->ibuf = p->idwt_buf;
794 b->stride = p->idwt_stride << (s->wavelet_depth - level);
797 b->orientation = orientation;
802 b->ibuf += b->stride>>1;
805 b->parent = &p->band[level-1][orientation];
810 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
811 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
812 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
813 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
816 p->xoffset = (p->xblen - p->xbsep)/2;
817 p->yoffset = (p->yblen - p->ybsep)/2;
822 * Unpack the motion compensation parameters
823 * Dirac Specification ->
824 * 11.2 Picture prediction data. picture_prediction()
826 static int dirac_unpack_prediction_parameters(DiracContext *s)
828 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
829 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
831 GetBitContext *gb = &s->gb;
835 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
836 /* Luma and Chroma are equal. 11.2.3 */
837 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
840 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
845 s->plane[0].xblen = svq3_get_ue_golomb(gb);
846 s->plane[0].yblen = svq3_get_ue_golomb(gb);
847 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
848 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
850 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
851 s->plane[0].xblen = default_blen[idx-1];
852 s->plane[0].yblen = default_blen[idx-1];
853 s->plane[0].xbsep = default_bsep[idx-1];
854 s->plane[0].ybsep = default_bsep[idx-1];
856 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
857 Calculated in function dirac_unpack_block_motion_data */
859 if (s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
860 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
863 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
864 av_log(s->avctx, AV_LOG_ERROR, "Block seperation greater than size\n");
867 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
868 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
872 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
873 Read motion vector precision */
874 s->mv_precision = svq3_get_ue_golomb(gb);
875 if (s->mv_precision > 3) {
876 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
880 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
881 Read the global motion compensation parameters */
882 s->globalmc_flag = get_bits1(gb);
883 if (s->globalmc_flag) {
884 memset(s->globalmc, 0, sizeof(s->globalmc));
885 /* [DIRAC_STD] pan_tilt(gparams) */
886 for (ref = 0; ref < s->num_refs; ref++) {
888 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
889 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
891 /* [DIRAC_STD] zoom_rotate_shear(gparams)
892 zoom/rotation/shear parameters */
894 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
895 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
896 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
897 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
898 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
900 s->globalmc[ref].zrs[0][0] = 1;
901 s->globalmc[ref].zrs[1][1] = 1;
903 /* [DIRAC_STD] perspective(gparams) */
905 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
906 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
907 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
912 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
913 Picture prediction mode, not currently used. */
914 if (svq3_get_ue_golomb(gb)) {
915 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
919 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
920 just data read, weight calculation will be done later on. */
921 s->weight_log2denom = 1;
926 s->weight_log2denom = svq3_get_ue_golomb(gb);
927 s->weight[0] = dirac_get_se_golomb(gb);
928 if (s->num_refs == 2)
929 s->weight[1] = dirac_get_se_golomb(gb);
935 * Dirac Specification ->
936 * 11.3 Wavelet transform data. wavelet_transform()
938 static int dirac_unpack_idwt_params(DiracContext *s)
940 GetBitContext *gb = &s->gb;
944 #define CHECKEDREAD(dst, cond, errmsg) \
945 tmp = svq3_get_ue_golomb(gb); \
947 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
954 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
958 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
959 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
961 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
964 /* Codeblock paramaters (core syntax only) */
966 for (i = 0; i <= s->wavelet_depth; i++) {
967 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
968 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
971 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
973 for (i = 0; i <= s->wavelet_depth; i++)
974 s->codeblock[i].width = s->codeblock[i].height = 1;
976 /* Slice parameters + quantization matrix*/
977 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
978 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
979 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
980 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
981 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
983 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
985 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
986 /* custom quantization matrix */
987 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
988 for (level = 0; level < s->wavelet_depth; level++) {
989 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
990 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
991 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
994 /* default quantization matrix */
995 for (level = 0; level < s->wavelet_depth; level++)
996 for (i = 0; i < 4; i++) {
997 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
998 /* haar with no shift differs for different depths */
999 if (s->wavelet_idx == 3)
1000 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1007 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1009 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1016 return sbsplit[-stride];
1018 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1021 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1028 return block[-1].ref & refmask;
1030 return block[-stride].ref & refmask;
1032 /* return the majority */
1033 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1034 return (pred >> 1) & refmask;
1037 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1041 memset(block->u.dc, 0, sizeof(block->u.dc));
1043 if (x && !(block[-1].ref & 3)) {
1044 for (i = 0; i < 3; i++)
1045 block->u.dc[i] += block[-1].u.dc[i];
1049 if (y && !(block[-stride].ref & 3)) {
1050 for (i = 0; i < 3; i++)
1051 block->u.dc[i] += block[-stride].u.dc[i];
1055 if (x && y && !(block[-1-stride].ref & 3)) {
1056 for (i = 0; i < 3; i++)
1057 block->u.dc[i] += block[-1-stride].u.dc[i];
1062 for (i = 0; i < 3; i++)
1063 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1064 } else if (n == 3) {
1065 for (i = 0; i < 3; i++)
1066 block->u.dc[i] = divide3(block->u.dc[i]);
1070 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1073 int refmask = ref+1;
1074 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1077 if (x && (block[-1].ref & mask) == refmask)
1078 pred[n++] = block[-1].u.mv[ref];
1080 if (y && (block[-stride].ref & mask) == refmask)
1081 pred[n++] = block[-stride].u.mv[ref];
1083 if (x && y && (block[-stride-1].ref & mask) == refmask)
1084 pred[n++] = block[-stride-1].u.mv[ref];
1088 block->u.mv[ref][0] = 0;
1089 block->u.mv[ref][1] = 0;
1092 block->u.mv[ref][0] = pred[0][0];
1093 block->u.mv[ref][1] = pred[0][1];
1096 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1097 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1100 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1101 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1106 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1108 int ez = s->globalmc[ref].zrs_exp;
1109 int ep = s->globalmc[ref].perspective_exp;
1110 int (*A)[2] = s->globalmc[ref].zrs;
1111 int *b = s->globalmc[ref].pan_tilt;
1112 int *c = s->globalmc[ref].perspective;
1114 int m = (1<<ep) - (c[0]*x + c[1]*y);
1115 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1116 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1118 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1119 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1122 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1123 int stride, int x, int y)
1127 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1128 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1130 if (s->num_refs == 2) {
1131 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1132 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1136 pred_block_dc(block, stride, x, y);
1137 for (i = 0; i < 3; i++)
1138 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1142 if (s->globalmc_flag) {
1143 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1144 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1147 for (i = 0; i < s->num_refs; i++)
1148 if (block->ref & (i+1)) {
1149 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1150 global_mv(s, block, x, y, i);
1152 pred_mv(block, stride, x, y, i);
1153 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1154 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1160 * Copies the current block to the other blocks covered by the current superblock split mode
1162 static void propagate_block_data(DiracBlock *block, int stride, int size)
1165 DiracBlock *dst = block;
1167 for (x = 1; x < size; x++)
1170 for (y = 1; y < size; y++) {
1172 for (x = 0; x < size; x++)
1178 * Dirac Specification ->
1179 * 12. Block motion data syntax
1181 static int dirac_unpack_block_motion_data(DiracContext *s)
1183 GetBitContext *gb = &s->gb;
1184 uint8_t *sbsplit = s->sbsplit;
1186 DiracArith arith[8];
1190 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1191 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1192 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1193 s->blwidth = 4 * s->sbwidth;
1194 s->blheight = 4 * s->sbheight;
1196 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1197 decode superblock split modes */
1198 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1199 for (y = 0; y < s->sbheight; y++) {
1200 for (x = 0; x < s->sbwidth; x++) {
1201 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1204 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1206 sbsplit += s->sbwidth;
1209 /* setup arith decoding */
1210 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1211 for (i = 0; i < s->num_refs; i++) {
1212 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1213 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1215 for (i = 0; i < 3; i++)
1216 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1218 for (y = 0; y < s->sbheight; y++)
1219 for (x = 0; x < s->sbwidth; x++) {
1220 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1221 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1223 for (q = 0; q < blkcnt; q++)
1224 for (p = 0; p < blkcnt; p++) {
1225 int bx = 4 * x + p*step;
1226 int by = 4 * y + q*step;
1227 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1228 decode_block_params(s, arith, block, s->blwidth, bx, by);
1229 propagate_block_data(block, s->blwidth, step);
1236 static int weight(int i, int blen, int offset)
1238 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1239 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1243 else if (i > blen-1 - 2*offset)
1244 return ROLLOFF(blen-1 - i);
1248 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1249 int left, int right, int wy)
1252 for (x = 0; left && x < p->xblen >> 1; x++)
1253 obmc_weight[x] = wy*8;
1254 for (; x < p->xblen >> right; x++)
1255 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1256 for (; x < p->xblen; x++)
1257 obmc_weight[x] = wy*8;
1258 for (; x < stride; x++)
1262 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1263 int left, int right, int top, int bottom)
1266 for (y = 0; top && y < p->yblen >> 1; y++) {
1267 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1268 obmc_weight += stride;
1270 for (; y < p->yblen >> bottom; y++) {
1271 int wy = weight(y, p->yblen, p->yoffset);
1272 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1273 obmc_weight += stride;
1275 for (; y < p->yblen; y++) {
1276 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1277 obmc_weight += stride;
1281 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1284 int bottom = by == s->blheight-1;
1286 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1287 if (top || bottom || by == 1) {
1288 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1289 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1290 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1294 static const uint8_t epel_weights[4][4][4] = {
1314 * For block x,y, determine which of the hpel planes to do bilinear
1315 * interpolation from and set src[] to the location in each hpel plane
1318 * @return the index of the put_dirac_pixels_tab function to use
1319 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1321 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1322 int x, int y, int ref, int plane)
1324 Plane *p = &s->plane[plane];
1325 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1326 int motion_x = block->u.mv[ref][0];
1327 int motion_y = block->u.mv[ref][1];
1328 int mx, my, i, epel, nplanes = 0;
1331 motion_x >>= s->chroma_x_shift;
1332 motion_y >>= s->chroma_y_shift;
1335 mx = motion_x & ~(-1 << s->mv_precision);
1336 my = motion_y & ~(-1 << s->mv_precision);
1337 motion_x >>= s->mv_precision;
1338 motion_y >>= s->mv_precision;
1339 /* normalize subpel coordinates to epel */
1340 /* TODO: template this function? */
1341 mx <<= 3 - s->mv_precision;
1342 my <<= 3 - s->mv_precision;
1351 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1355 for (i = 0; i < 4; i++)
1356 src[i] = ref_hpel[i] + y*p->stride + x;
1358 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1359 we increment x/y because the edge changes for half of the pixels */
1366 src[0] += p->stride;
1367 src[1] += p->stride;
1375 /* check if we really only need 2 planes since either mx or my is
1376 a hpel position. (epel weights of 0 handle this there) */
1378 /* mx == 0: average [0] and [2]
1379 mx == 4: average [1] and [3] */
1380 src[!mx] = src[2 + !!mx];
1382 } else if (!(my&3)) {
1383 src[0] = src[(my>>1) ];
1384 src[1] = src[(my>>1)+1];
1388 /* adjust the ordering if needed so the weights work */
1390 FFSWAP(const uint8_t *, src[0], src[1]);
1391 FFSWAP(const uint8_t *, src[2], src[3]);
1394 FFSWAP(const uint8_t *, src[0], src[2]);
1395 FFSWAP(const uint8_t *, src[1], src[3]);
1397 src[4] = epel_weights[my&3][mx&3];
1401 /* fixme: v/h _edge_pos */
1402 if ((unsigned)x > p->width +EDGE_WIDTH/2 - p->xblen ||
1403 (unsigned)y > p->height+EDGE_WIDTH/2 - p->yblen) {
1404 for (i = 0; i < nplanes; i++) {
1405 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1406 p->xblen, p->yblen, x, y,
1407 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1408 src[i] = s->edge_emu_buffer[i];
1411 return (nplanes>>1) + epel;
1414 static void add_dc(uint16_t *dst, int dc, int stride,
1415 uint8_t *obmc_weight, int xblen, int yblen)
1420 for (y = 0; y < yblen; y++) {
1421 for (x = 0; x < xblen; x += 2) {
1422 dst[x ] += dc * obmc_weight[x ];
1423 dst[x+1] += dc * obmc_weight[x+1];
1426 obmc_weight += MAX_BLOCKSIZE;
1430 static void block_mc(DiracContext *s, DiracBlock *block,
1431 uint16_t *mctmp, uint8_t *obmc_weight,
1432 int plane, int dstx, int dsty)
1434 Plane *p = &s->plane[plane];
1435 const uint8_t *src[5];
1438 switch (block->ref&3) {
1440 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1444 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1445 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1447 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1448 s->weight[0] + s->weight[1], p->yblen);
1451 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1452 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1453 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1454 if (s->biweight_func) {
1455 /* fixme: +32 is a quick hack */
1456 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1457 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1458 s->weight[0], s->weight[1], p->yblen);
1460 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1463 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1466 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1468 Plane *p = &s->plane[plane];
1469 int x, dstx = p->xbsep - p->xoffset;
1471 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1474 for (x = 1; x < s->blwidth-1; x++) {
1475 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1479 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1482 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1490 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1491 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1492 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1493 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1494 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1495 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1497 s->weight_func = NULL;
1498 s->biweight_func = NULL;
1502 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1504 /* chroma allocates an edge of 8 when subsampled
1505 which for 4:2:2 means an h edge of 16 and v edge of 8
1506 just use 8 for everything for the moment */
1507 int i, edge = EDGE_WIDTH/2;
1509 ref->hpel[plane][0] = ref->avframe.data[plane];
1510 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1512 /* no need for hpel if we only have fpel vectors */
1513 if (!s->mv_precision)
1516 for (i = 1; i < 4; i++) {
1517 if (!ref->hpel_base[plane][i])
1518 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1519 /* we need to be 16-byte aligned even for chroma */
1520 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1523 if (!ref->interpolated[plane]) {
1524 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1525 ref->hpel[plane][3], ref->hpel[plane][0],
1526 ref->avframe.linesize[plane], width, height);
1527 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1528 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1529 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1531 ref->interpolated[plane] = 1;
1535 * Dirac Specification ->
1536 * 13.0 Transform data syntax. transform_data()
1538 static int dirac_decode_frame_internal(DiracContext *s)
1541 int y, i, comp, dsty;
1544 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1545 for (comp = 0; comp < 3; comp++) {
1546 Plane *p = &s->plane[comp];
1547 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1553 for (comp = 0; comp < 3; comp++) {
1554 Plane *p = &s->plane[comp];
1555 uint8_t *frame = s->current_picture->avframe.data[comp];
1557 /* FIXME: small resolutions */
1558 for (i = 0; i < 4; i++)
1559 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1561 if (!s->zero_res && !s->low_delay)
1563 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1564 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1566 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1567 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1570 if (!s->num_refs) { /* intra */
1571 for (y = 0; y < p->height; y += 16) {
1572 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1573 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1574 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1576 } else { /* inter */
1577 int rowheight = p->ybsep*p->stride;
1579 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1581 for (i = 0; i < s->num_refs; i++)
1582 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1584 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1587 for (y = 0; y < s->blheight; y++) {
1589 start = FFMAX(dsty, 0);
1590 uint16_t *mctmp = s->mctmp + y*rowheight;
1591 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1593 init_obmc_weights(s, p, y);
1595 if (y == s->blheight-1 || start+p->ybsep > p->height)
1596 h = p->height - start;
1598 h = p->ybsep - (start - dsty);
1602 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1603 mc_row(s, blocks, mctmp, comp, dsty);
1605 mctmp += (start - dsty)*p->stride + p->xoffset;
1606 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1607 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1608 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1620 * Dirac Specification ->
1621 * 11.1.1 Picture Header. picture_header()
1623 static int dirac_decode_picture_header(DiracContext *s)
1626 int i, j, refnum, refdist;
1627 GetBitContext *gb = &s->gb;
1629 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1630 picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1633 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1635 /* if this is the first keyframe after a sequence header, start our
1636 reordering from here */
1637 if (s->frame_number < 0)
1638 s->frame_number = picnum;
1640 s->ref_pics[0] = s->ref_pics[1] = NULL;
1641 for (i = 0; i < s->num_refs; i++) {
1642 refnum = picnum + dirac_get_se_golomb(gb);
1645 /* find the closest reference to the one we want */
1646 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1647 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1648 if (s->ref_frames[j]
1649 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1650 s->ref_pics[i] = s->ref_frames[j];
1651 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1654 if (!s->ref_pics[i] || refdist)
1655 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1657 /* if there were no references at all, allocate one */
1658 if (!s->ref_pics[i])
1659 for (j = 0; j < MAX_FRAMES; j++)
1660 if (!s->all_frames[j].avframe.data[0]) {
1661 s->ref_pics[i] = &s->all_frames[j];
1662 s->avctx->get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1666 /* retire the reference frames that are not used anymore */
1667 if (s->current_picture->avframe.reference) {
1668 retire = picnum + dirac_get_se_golomb(gb);
1669 if (retire != picnum) {
1670 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1673 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1675 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1678 /* if reference array is full, remove the oldest as per the spec */
1679 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1680 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1681 remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1686 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1688 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1691 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1698 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *data_size)
1700 DiracFrame *out = s->delay_frames[0];
1703 /* find frame with lowest picture number */
1704 for (i = 1; s->delay_frames[i]; i++)
1705 if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1706 out = s->delay_frames[i];
1710 for (i = out_idx; s->delay_frames[i]; i++)
1711 s->delay_frames[i] = s->delay_frames[i+1];
1714 out->avframe.reference ^= DELAYED_PIC_REF;
1715 *data_size = sizeof(AVFrame);
1716 *(AVFrame *)picture = out->avframe;
1723 * Dirac Specification ->
1724 * 9.6 Parse Info Header Syntax. parse_info()
1725 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1727 #define DATA_UNIT_HEADER_SIZE 13
1729 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1730 inside the function parse_sequence() */
1731 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1733 DiracContext *s = avctx->priv_data;
1734 DiracFrame *pic = NULL;
1735 int i, parse_code = buf[4];
1738 if (size < DATA_UNIT_HEADER_SIZE)
1741 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1743 if (parse_code == pc_seq_header) {
1744 if (s->seen_sequence_header)
1747 /* [DIRAC_STD] 10. Sequence header */
1748 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1751 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1753 if (alloc_sequence_buffers(s))
1756 s->seen_sequence_header = 1;
1757 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1758 free_sequence_buffers(s);
1759 s->seen_sequence_header = 0;
1760 } else if (parse_code == pc_aux_data) {
1761 if (buf[13] == 1) { /* encoder implementation/version */
1763 /* versions older than 1.0.8 don't store quant delta for
1764 subbands with only one codeblock */
1765 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1766 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1767 s->old_delta_quant = 1;
1769 } else if (parse_code & 0x8) { /* picture data unit */
1770 if (!s->seen_sequence_header) {
1771 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1775 /* find an unused frame */
1776 for (i = 0; i < MAX_FRAMES; i++)
1777 if (s->all_frames[i].avframe.data[0] == NULL)
1778 pic = &s->all_frames[i];
1780 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1784 avcodec_get_frame_defaults(&pic->avframe);
1786 /* [DIRAC_STD] Defined in 9.6.1 ... */
1787 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1789 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1793 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1794 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1795 pic->avframe.reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1796 pic->avframe.key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1797 pic->avframe.pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1799 if (avctx->get_buffer(avctx, &pic->avframe) < 0) {
1800 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1803 s->current_picture = pic;
1804 s->plane[0].stride = pic->avframe.linesize[0];
1805 s->plane[1].stride = pic->avframe.linesize[1];
1806 s->plane[2].stride = pic->avframe.linesize[2];
1808 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1809 if (dirac_decode_picture_header(s))
1812 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1813 if (dirac_decode_frame_internal(s))
1819 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *pkt)
1821 DiracContext *s = avctx->priv_data;
1822 DiracFrame *picture = data;
1823 uint8_t *buf = pkt->data;
1824 int buf_size = pkt->size;
1825 int i, data_unit_size, buf_idx = 0;
1827 /* release unused frames */
1828 for (i = 0; i < MAX_FRAMES; i++)
1829 if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1830 avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1831 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1834 s->current_picture = NULL;
1837 /* end of stream, so flush delayed pics */
1839 return get_delayed_pic(s, (AVFrame *)data, data_size);
1842 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1843 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1844 BBCD start code search */
1845 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1846 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1847 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1850 /* BBCD found or end of data */
1851 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1854 data_unit_size = AV_RB32(buf+buf_idx+5);
1855 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1856 if(buf_idx + data_unit_size > buf_size)
1857 av_log(s->avctx, AV_LOG_ERROR,
1858 "Data unit with size %d is larger than input buffer, discarding\n",
1863 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1864 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1866 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1869 buf_idx += data_unit_size;
1872 if (!s->current_picture)
1875 if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1876 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1878 s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1880 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1881 int min_num = s->delay_frames[0]->avframe.display_picture_number;
1882 /* Too many delayed frames, so we display the frame with the lowest pts */
1883 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1884 delayed_frame = s->delay_frames[0];
1886 for (i = 1; s->delay_frames[i]; i++)
1887 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1888 min_num = s->delay_frames[i]->avframe.display_picture_number;
1890 delayed_frame = remove_frame(s->delay_frames, min_num);
1891 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1894 if (delayed_frame) {
1895 delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1896 *(AVFrame*)data = delayed_frame->avframe;
1897 *data_size = sizeof(AVFrame);
1899 } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1900 /* The right frame at the right time :-) */
1901 *(AVFrame*)data = s->current_picture->avframe;
1902 *data_size = sizeof(AVFrame);
1906 s->frame_number = picture->avframe.display_picture_number + 1;
1911 AVCodec ff_dirac_decoder = {
1913 .type = AVMEDIA_TYPE_VIDEO,
1914 .id = CODEC_ID_DIRAC,
1915 .priv_data_size = sizeof(DiracContext),
1916 .init = dirac_decode_init,
1917 .close = dirac_decode_end,
1918 .decode = dirac_decode_frame,
1919 .capabilities = CODEC_CAP_DELAY,
1920 .flush = dirac_decode_flush,
1921 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),