2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * @file libavcodec/diracdec.c
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
34 #include "dirac_arith.h"
35 #include "mpeg12data.h"
41 * The spec limits the number of wavelet decompositions to 4 for both
42 * level 1 (VC-2) and 128 (long-gop default).
43 * 5 decompositions is the maximum before >16-bit buffers are needed.
44 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
45 * the others to 4 decompositions (or 3 for the fidelity filter).
47 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
49 #define MAX_DWT_LEVELS 5
52 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
54 #define MAX_REFERENCE_FRAMES 8
55 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
56 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
57 #define MAX_QUANT 68 /* max quant for VC-2 */
58 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
61 * DiracBlock->ref flags, if set then the block does MC from the given ref
63 #define DIRAC_REF_MASK_REF1 1
64 #define DIRAC_REF_MASK_REF2 2
65 #define DIRAC_REF_MASK_GLOBAL 4
68 * Value of Picture.reference when Picture is not a reference picture, but
69 * is held for delayed output.
71 #define DELAYED_PIC_REF 4
73 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
75 #define CALC_PADDING(size, depth) \
76 (((size + (1 << depth) - 1) >> depth) << depth)
78 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
82 int interpolated[3]; /* 1 if hpel[] is valid */
84 uint8_t *hpel_base[3][4];
91 } u; /* anonymous unions aren't in C99 :( */
95 typedef struct SubBand {
103 struct SubBand *parent;
107 const uint8_t *coeff_data;
110 typedef struct Plane {
119 IDWTELEM *idwt_buf_base;
125 /* block separation (block n+1 starts after this many pixels in block n) */
128 /* amount of overspill on each edge (half of the overlap between blocks) */
132 SubBand band[MAX_DWT_LEVELS][4];
135 typedef struct DiracContext {
136 AVCodecContext *avctx;
138 DiracDSPContext diracdsp;
140 dirac_source_params source;
141 int seen_sequence_header;
142 int frame_number; /* number of the next frame to display */
147 int zero_res; /* zero residue flag */
148 int is_arith; /* whether coeffs use arith or golomb coding */
149 int low_delay; /* use the low delay syntax */
150 int globalmc_flag; /* use global motion compensation */
151 int num_refs; /* number of reference pictures */
153 /* wavelet decoding */
154 unsigned wavelet_depth; /* depth of the IDWT */
155 unsigned wavelet_idx;
158 * schroedinger older than 1.0.8 doesn't store
159 * quant delta if only one codebook exists in a band
161 unsigned old_delta_quant;
162 unsigned codeblock_mode;
167 } codeblock[MAX_DWT_LEVELS+1];
170 unsigned num_x; /* number of horizontal slices */
171 unsigned num_y; /* number of vertical slices */
172 AVRational bytes; /* average bytes per slice */
173 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
177 int pan_tilt[2]; /* pan/tilt vector */
178 int zrs[2][2]; /* zoom/rotate/shear matrix */
179 int perspective[2]; /* perspective vector */
181 unsigned perspective_exp;
184 /* motion compensation */
185 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
186 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
187 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
189 int blwidth; /* number of blocks (horizontally) */
190 int blheight; /* number of blocks (vertically) */
191 int sbwidth; /* number of superblocks (horizontally) */
192 int sbheight; /* number of superblocks (vertically) */
195 DiracBlock *blmotion;
197 uint8_t *edge_emu_buffer[4];
198 uint8_t *edge_emu_buffer_base;
200 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
203 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
205 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
206 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
207 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
208 dirac_weight_func weight_func;
209 dirac_biweight_func biweight_func;
211 DiracFrame *current_picture;
212 DiracFrame *ref_pics[2];
214 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
215 DiracFrame *delay_frames[MAX_DELAY+1];
216 DiracFrame all_frames[MAX_FRAMES];
220 * Dirac Specification ->
221 * Parse code values. 9.6.1 Table 9.1
223 enum dirac_parse_code {
224 pc_seq_header = 0x00,
237 static const uint8_t default_qmat[][4][4] = {
238 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
239 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
242 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
243 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
244 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
247 static const int qscale_tab[MAX_QUANT+1] = {
248 4, 5, 6, 7, 8, 10, 11, 13,
249 16, 19, 23, 27, 32, 38, 45, 54,
250 64, 76, 91, 108, 128, 152, 181, 215,
251 256, 304, 362, 431, 512, 609, 724, 861,
252 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
253 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
254 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
258 static const int qoffset_intra_tab[MAX_QUANT+1] = {
259 1, 2, 3, 4, 4, 5, 6, 7,
260 8, 10, 12, 14, 16, 19, 23, 27,
261 32, 38, 46, 54, 64, 76, 91, 108,
262 128, 152, 181, 216, 256, 305, 362, 431,
263 512, 609, 724, 861, 1024, 1218, 1448, 1722,
264 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
265 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
269 static const int qoffset_inter_tab[MAX_QUANT+1] = {
270 1, 2, 2, 3, 3, 4, 4, 5,
271 6, 7, 9, 10, 12, 14, 17, 20,
272 24, 29, 34, 41, 48, 57, 68, 81,
273 96, 114, 136, 162, 192, 228, 272, 323,
274 384, 457, 543, 646, 768, 913, 1086, 1292,
275 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
276 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
280 /* magic number division by 3 from schroedinger */
281 static inline int divide3(int x)
283 return ((x+1)*21845 + 10922) >> 16;
286 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
288 DiracFrame *remove_pic = NULL;
289 int i, remove_idx = -1;
291 for (i = 0; framelist[i]; i++)
292 if (framelist[i]->avframe.display_picture_number == picnum) {
293 remove_pic = framelist[i];
298 for (i = remove_idx; framelist[i]; i++)
299 framelist[i] = framelist[i+1];
304 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
307 for (i = 0; i < maxframes; i++)
309 framelist[i] = frame;
315 static int alloc_sequence_buffers(DiracContext *s)
317 int sbwidth = DIVRNDUP(s->source.width, 4);
318 int sbheight = DIVRNDUP(s->source.height, 4);
319 int i, w, h, top_padding;
321 /* todo: think more about this / use or set Plane here */
322 for (i = 0; i < 3; i++) {
323 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
324 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
325 w = s->source.width >> (i ? s->chroma_x_shift : 0);
326 h = s->source.height >> (i ? s->chroma_y_shift : 0);
328 /* we allocate the max we support here since num decompositions can
329 * change from frame to frame. Stride is aligned to 16 for SIMD, and
330 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
331 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
333 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
334 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
335 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
337 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
338 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
339 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
340 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
341 return AVERROR(ENOMEM);
345 h = s->source.height;
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc(sbwidth * sbheight);
349 s->blmotion = av_malloc(sbwidth * sbheight * 4 * sizeof(*s->blmotion));
350 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
352 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h*MAX_BLOCKSIZE) * sizeof(*s->mctmp));
353 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
355 if (!s->sbsplit || !s->blmotion)
356 return AVERROR(ENOMEM);
360 static void free_sequence_buffers(DiracContext *s)
364 for (i = 0; i < MAX_FRAMES; i++) {
365 if (s->all_frames[i].avframe.data[0]) {
366 s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
367 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
370 for (j = 0; j < 3; j++)
371 for (k = 1; k < 4; k++)
372 av_freep(&s->all_frames[i].hpel_base[j][k]);
375 memset(s->ref_frames, 0, sizeof(s->ref_frames));
376 memset(s->delay_frames, 0, sizeof(s->delay_frames));
378 for (i = 0; i < 3; i++) {
379 av_freep(&s->plane[i].idwt_buf_base);
380 av_freep(&s->plane[i].idwt_tmp);
383 av_freep(&s->sbsplit);
384 av_freep(&s->blmotion);
385 av_freep(&s->edge_emu_buffer_base);
388 av_freep(&s->mcscratch);
391 static av_cold int dirac_decode_init(AVCodecContext *avctx)
393 DiracContext *s = avctx->priv_data;
395 s->frame_number = -1;
397 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
398 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
399 return AVERROR_PATCHWELCOME;
402 dsputil_init(&s->dsp, avctx);
403 ff_diracdsp_init(&s->diracdsp);
408 static void dirac_decode_flush(AVCodecContext *avctx)
410 DiracContext *s = avctx->priv_data;
411 free_sequence_buffers(s);
412 s->seen_sequence_header = 0;
413 s->frame_number = -1;
416 static av_cold int dirac_decode_end(AVCodecContext *avctx)
418 dirac_decode_flush(avctx);
422 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
424 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
425 SubBand *b, IDWTELEM *buf, int x, int y)
429 int pred_ctx = CTX_ZPZN_F1;
431 /* Check if the parent subband has a 0 in the corresponding position */
433 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
435 if (b->orientation == subband_hl)
436 sign_pred = buf[-b->stride];
438 /* Determine if the pixel has only zeros in its neighbourhood */
440 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
441 if (b->orientation == subband_lh)
444 pred_ctx += !buf[-b->stride];
447 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
449 coeff = (coeff * qfactor + qoffset + 2) >> 2;
450 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
451 coeff = (coeff ^ -sign) + sign;
456 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
460 coeff = svq3_get_ue_golomb(gb);
462 coeff = (coeff * qfactor + qoffset + 2) >> 2;
463 sign = get_bits1(gb);
464 coeff = (coeff ^ -sign) + sign;
470 * Decode the coeffs in the rectangle defined by left, right, top, bottom
471 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
473 static inline void codeblock(DiracContext *s, SubBand *b,
474 GetBitContext *gb, DiracArith *c,
475 int left, int right, int top, int bottom,
476 int blockcnt_one, int is_arith)
478 int x, y, zero_block;
479 int qoffset, qfactor;
482 /* check for any coded coefficients in this codeblock */
485 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
487 zero_block = get_bits1(gb);
493 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
495 b->quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
497 b->quant += dirac_get_se_golomb(gb);
500 b->quant = FFMIN(b->quant, MAX_QUANT);
502 qfactor = qscale_tab[b->quant];
503 /* TODO: context pointer? */
505 qoffset = qoffset_intra_tab[b->quant];
507 qoffset = qoffset_inter_tab[b->quant];
509 buf = b->ibuf + top * b->stride;
510 for (y = top; y < bottom; y++) {
511 for (x = left; x < right; x++) {
512 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
514 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
516 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
523 * Dirac Specification ->
524 * 13.3 intra_dc_prediction(band)
526 static inline void intra_dc_prediction(SubBand *b)
528 IDWTELEM *buf = b->ibuf;
531 for (x = 1; x < b->width; x++)
535 for (y = 1; y < b->height; y++) {
536 buf[0] += buf[-b->stride];
538 for (x = 1; x < b->width; x++) {
539 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
540 buf[x] += divide3(pred);
547 * Dirac Specification ->
548 * 13.4.2 Non-skipped subbands. subband_coeffs()
550 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
552 int cb_x, cb_y, left, right, top, bottom;
555 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
556 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
557 int blockcnt_one = (cb_width + cb_height) == 2;
562 init_get_bits(&gb, b->coeff_data, b->length*8);
565 ff_dirac_init_arith_decoder(&c, &gb, b->length);
568 for (cb_y = 0; cb_y < cb_height; cb_y++) {
569 bottom = (b->height * (cb_y+1)) / cb_height;
571 for (cb_x = 0; cb_x < cb_width; cb_x++) {
572 right = (b->width * (cb_x+1)) / cb_width;
573 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
579 if (b->orientation == subband_ll && s->num_refs == 0)
580 intra_dc_prediction(b);
583 static int decode_subband_arith(AVCodecContext *avctx, void *b)
585 DiracContext *s = avctx->priv_data;
586 decode_subband_internal(s, b, 1);
590 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
592 DiracContext *s = avctx->priv_data;
594 decode_subband_internal(s, *b, 0);
599 * Dirac Specification ->
600 * [DIRAC_STD] 13.4.1 core_transform_data()
602 static void decode_component(DiracContext *s, int comp)
604 AVCodecContext *avctx = s->avctx;
605 SubBand *bands[3*MAX_DWT_LEVELS+1];
606 enum dirac_subband orientation;
607 int level, num_bands = 0;
609 /* Unpack all subbands at all levels. */
610 for (level = 0; level < s->wavelet_depth; level++) {
611 for (orientation = !!level; orientation < 4; orientation++) {
612 SubBand *b = &s->plane[comp].band[level][orientation];
613 bands[num_bands++] = b;
615 align_get_bits(&s->gb);
616 /* [DIRAC_STD] 13.4.2 subband() */
617 b->length = svq3_get_ue_golomb(&s->gb);
619 b->quant = svq3_get_ue_golomb(&s->gb);
620 align_get_bits(&s->gb);
621 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
622 b->length = FFMIN(b->length, get_bits_left(&s->gb)/8);
623 skip_bits_long(&s->gb, b->length*8);
626 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
628 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
629 NULL, 4-!!level, sizeof(SubBand));
631 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
633 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
636 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
637 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
638 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
639 int slice_x, int slice_y, int bits_end,
640 SubBand *b1, SubBand *b2)
642 int left = b1->width * slice_x / s->lowdelay.num_x;
643 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
644 int top = b1->height * slice_y / s->lowdelay.num_y;
645 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
647 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
648 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
650 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
651 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
653 /* we have to constantly check for overread since the spec explictly
654 requires this, with the meaning that all remaining coeffs are set to 0 */
655 if (get_bits_count(gb) >= bits_end)
658 for (y = top; y < bottom; y++) {
659 for (x = left; x < right; x++) {
660 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
661 if (get_bits_count(gb) >= bits_end)
664 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
665 if (get_bits_count(gb) >= bits_end)
675 struct lowdelay_slice {
684 * Dirac Specification ->
685 * 13.5.2 Slices. slice(sx,sy)
687 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
689 DiracContext *s = avctx->priv_data;
690 struct lowdelay_slice *slice = arg;
691 GetBitContext *gb = &slice->gb;
692 enum dirac_subband orientation;
693 int level, quant, chroma_bits, chroma_end;
695 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
696 int length_bits = av_log2(8 * slice->bytes)+1;
697 int luma_bits = get_bits_long(gb, length_bits);
698 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
700 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
701 for (level = 0; level < s->wavelet_depth; level++)
702 for (orientation = !!level; orientation < 4; orientation++) {
703 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
704 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
705 &s->plane[0].band[level][orientation], NULL);
708 /* consume any unused bits from luma */
709 skip_bits_long(gb, get_bits_count(gb) - luma_end);
711 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
712 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
713 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
714 for (level = 0; level < s->wavelet_depth; level++)
715 for (orientation = !!level; orientation < 4; orientation++) {
716 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
717 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
718 &s->plane[1].band[level][orientation],
719 &s->plane[2].band[level][orientation]);
726 * Dirac Specification ->
727 * 13.5.1 low_delay_transform_data()
729 static void decode_lowdelay(DiracContext *s)
731 AVCodecContext *avctx = s->avctx;
732 int slice_x, slice_y, bytes, bufsize;
734 struct lowdelay_slice *slices;
737 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
739 align_get_bits(&s->gb);
740 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
741 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
742 bufsize = get_bits_left(&s->gb);
744 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
745 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
746 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
747 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
749 slices[slice_num].bytes = bytes;
750 slices[slice_num].slice_x = slice_x;
751 slices[slice_num].slice_y = slice_y;
752 init_get_bits(&slices[slice_num].gb, buf, bufsize);
759 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
760 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
761 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
762 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
763 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
767 static void init_planes(DiracContext *s)
769 int i, w, h, level, orientation;
771 for (i = 0; i < 3; i++) {
772 Plane *p = &s->plane[i];
774 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
775 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
776 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
777 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
778 p->idwt_stride = FFALIGN(p->idwt_width, 8);
780 for (level = s->wavelet_depth-1; level >= 0; level--) {
783 for (orientation = !!level; orientation < 4; orientation++) {
784 SubBand *b = &p->band[level][orientation];
786 b->ibuf = p->idwt_buf;
788 b->stride = p->idwt_stride << (s->wavelet_depth - level);
791 b->orientation = orientation;
796 b->ibuf += b->stride>>1;
799 b->parent = &p->band[level-1][orientation];
804 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
805 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
806 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
807 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
810 p->xoffset = (p->xblen - p->xbsep)/2;
811 p->yoffset = (p->yblen - p->ybsep)/2;
816 * Unpack the motion compensation parameters
817 * Dirac Specification ->
818 * 11.2 Picture prediction data. picture_prediction()
820 static int dirac_unpack_prediction_parameters(DiracContext *s)
822 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
823 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
825 GetBitContext *gb = &s->gb;
829 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
830 /* Luma and Chroma are equal. 11.2.3 */
831 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
834 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
839 s->plane[0].xblen = svq3_get_ue_golomb(gb);
840 s->plane[0].yblen = svq3_get_ue_golomb(gb);
841 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
842 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
844 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
845 s->plane[0].xblen = default_blen[idx-1];
846 s->plane[0].yblen = default_blen[idx-1];
847 s->plane[0].xbsep = default_bsep[idx-1];
848 s->plane[0].ybsep = default_bsep[idx-1];
850 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
851 Calculated in function dirac_unpack_block_motion_data */
853 if (s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
854 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
857 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
858 av_log(s->avctx, AV_LOG_ERROR, "Block seperation greater than size\n");
861 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
862 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
866 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
867 Read motion vector precision */
868 s->mv_precision = svq3_get_ue_golomb(gb);
869 if (s->mv_precision > 3) {
870 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
874 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
875 Read the global motion compensation parameters */
876 s->globalmc_flag = get_bits1(gb);
877 if (s->globalmc_flag) {
878 memset(s->globalmc, 0, sizeof(s->globalmc));
879 /* [DIRAC_STD] pan_tilt(gparams) */
880 for (ref = 0; ref < s->num_refs; ref++) {
882 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
883 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
885 /* [DIRAC_STD] zoom_rotate_shear(gparams)
886 zoom/rotation/shear parameters */
888 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
889 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
890 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
891 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
892 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
894 s->globalmc[ref].zrs[0][0] = 1;
895 s->globalmc[ref].zrs[1][1] = 1;
897 /* [DIRAC_STD] perspective(gparams) */
899 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
900 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
901 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
906 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
907 Picture prediction mode, not currently used. */
908 if (svq3_get_ue_golomb(gb)) {
909 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
913 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
914 just data read, weight calculation will be done later on. */
915 s->weight_log2denom = 1;
920 s->weight_log2denom = svq3_get_ue_golomb(gb);
921 s->weight[0] = dirac_get_se_golomb(gb);
922 if (s->num_refs == 2)
923 s->weight[1] = dirac_get_se_golomb(gb);
929 * Dirac Specification ->
930 * 11.3 Wavelet transform data. wavelet_transform()
932 static int dirac_unpack_idwt_params(DiracContext *s)
934 GetBitContext *gb = &s->gb;
939 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
943 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
944 s->wavelet_idx = svq3_get_ue_golomb(gb);
945 if (s->wavelet_idx > 6)
948 s->wavelet_depth = svq3_get_ue_golomb(gb);
949 if (s->wavelet_depth > MAX_DWT_LEVELS) {
950 av_log(s->avctx, AV_LOG_ERROR, "too many dwt decompositions\n");
955 /* Codeblock paramaters (core syntax only) */
957 for (i = 0; i <= s->wavelet_depth; i++) {
958 s->codeblock[i].width = svq3_get_ue_golomb(gb);
959 s->codeblock[i].height = svq3_get_ue_golomb(gb);
962 s->codeblock_mode = svq3_get_ue_golomb(gb);
963 if (s->codeblock_mode > 1) {
964 av_log(s->avctx, AV_LOG_ERROR, "unknown codeblock mode\n");
968 for (i = 0; i <= s->wavelet_depth; i++)
969 s->codeblock[i].width = s->codeblock[i].height = 1;
971 /* Slice parameters + quantization matrix*/
972 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
973 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
974 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
975 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
976 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
978 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
980 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
981 /* custom quantization matrix */
982 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
983 for (level = 0; level < s->wavelet_depth; level++) {
984 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
985 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
986 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
989 /* default quantization matrix */
990 for (level = 0; level < s->wavelet_depth; level++)
991 for (i = 0; i < 4; i++) {
992 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
993 /* haar with no shift differs for different depths */
994 if (s->wavelet_idx == 3)
995 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1002 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1004 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1011 return sbsplit[-stride];
1013 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1016 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1023 return block[-1].ref & refmask;
1025 return block[-stride].ref & refmask;
1027 /* return the majority */
1028 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1029 return (pred >> 1) & refmask;
1032 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1036 memset(block->u.dc, 0, sizeof(block->u.dc));
1038 if (x && !(block[-1].ref & 3)) {
1039 for (i = 0; i < 3; i++)
1040 block->u.dc[i] += block[-1].u.dc[i];
1044 if (y && !(block[-stride].ref & 3)) {
1045 for (i = 0; i < 3; i++)
1046 block->u.dc[i] += block[-stride].u.dc[i];
1050 if (x && y && !(block[-1-stride].ref & 3)) {
1051 for (i = 0; i < 3; i++)
1052 block->u.dc[i] += block[-1-stride].u.dc[i];
1057 for (i = 0; i < 3; i++)
1058 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1059 } else if (n == 3) {
1060 for (i = 0; i < 3; i++)
1061 block->u.dc[i] = divide3(block->u.dc[i]);
1065 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1068 int refmask = ref+1;
1069 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1072 if (x && (block[-1].ref & mask) == refmask)
1073 pred[n++] = block[-1].u.mv[ref];
1075 if (y && (block[-stride].ref & mask) == refmask)
1076 pred[n++] = block[-stride].u.mv[ref];
1078 if (x && y && (block[-stride-1].ref & mask) == refmask)
1079 pred[n++] = block[-stride-1].u.mv[ref];
1083 block->u.mv[ref][0] = 0;
1084 block->u.mv[ref][1] = 0;
1087 block->u.mv[ref][0] = pred[0][0];
1088 block->u.mv[ref][1] = pred[0][1];
1091 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1092 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1095 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1096 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1101 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1103 int ez = s->globalmc[ref].zrs_exp;
1104 int ep = s->globalmc[ref].perspective_exp;
1105 int (*A)[2] = s->globalmc[ref].zrs;
1106 int *b = s->globalmc[ref].pan_tilt;
1107 int *c = s->globalmc[ref].perspective;
1109 int m = (1<<ep) - (c[0]*x + c[1]*y);
1110 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1111 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1113 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1114 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1117 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1118 int stride, int x, int y)
1122 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1123 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1125 if (s->num_refs == 2) {
1126 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1127 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1131 pred_block_dc(block, stride, x, y);
1132 for (i = 0; i < 3; i++)
1133 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1137 if (s->globalmc_flag) {
1138 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1139 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1142 for (i = 0; i < s->num_refs; i++)
1143 if (block->ref & (i+1)) {
1144 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1145 global_mv(s, block, x, y, i);
1147 pred_mv(block, stride, x, y, i);
1148 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1149 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1155 * Copies the current block to the other blocks covered by the current superblock split mode
1157 static void propagate_block_data(DiracBlock *block, int stride, int size)
1160 DiracBlock *dst = block;
1162 for (x = 1; x < size; x++)
1165 for (y = 1; y < size; y++) {
1167 for (x = 0; x < size; x++)
1173 * Dirac Specification ->
1174 * 12. Block motion data syntax
1176 static void dirac_unpack_block_motion_data(DiracContext *s)
1178 GetBitContext *gb = &s->gb;
1179 uint8_t *sbsplit = s->sbsplit;
1181 DiracArith arith[8];
1185 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1186 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1187 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1188 s->blwidth = 4 * s->sbwidth;
1189 s->blheight = 4 * s->sbheight;
1191 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1192 decode superblock split modes */
1193 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1194 for (y = 0; y < s->sbheight; y++) {
1195 for (x = 0; x < s->sbwidth; x++) {
1196 int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1197 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1199 sbsplit += s->sbwidth;
1202 /* setup arith decoding */
1203 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1204 for (i = 0; i < s->num_refs; i++) {
1205 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1206 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1208 for (i = 0; i < 3; i++)
1209 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1211 for (y = 0; y < s->sbheight; y++)
1212 for (x = 0; x < s->sbwidth; x++) {
1213 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1214 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1216 for (q = 0; q < blkcnt; q++)
1217 for (p = 0; p < blkcnt; p++) {
1218 int bx = 4 * x + p*step;
1219 int by = 4 * y + q*step;
1220 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1221 decode_block_params(s, arith, block, s->blwidth, bx, by);
1222 propagate_block_data(block, s->blwidth, step);
1227 static int weight(int i, int blen, int offset)
1229 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1230 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1234 else if (i > blen-1 - 2*offset)
1235 return ROLLOFF(blen-1 - i);
1239 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1240 int left, int right, int wy)
1243 for (x = 0; left && x < p->xblen >> 1; x++)
1244 obmc_weight[x] = wy*8;
1245 for (; x < p->xblen >> right; x++)
1246 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1247 for (; x < p->xblen; x++)
1248 obmc_weight[x] = wy*8;
1249 for (; x < stride; x++)
1253 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1254 int left, int right, int top, int bottom)
1257 for (y = 0; top && y < p->yblen >> 1; y++) {
1258 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1259 obmc_weight += stride;
1261 for (; y < p->yblen >> bottom; y++) {
1262 int wy = weight(y, p->yblen, p->yoffset);
1263 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1264 obmc_weight += stride;
1266 for (; y < p->yblen; y++) {
1267 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1268 obmc_weight += stride;
1272 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1275 int bottom = by == s->blheight-1;
1277 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1278 if (top || bottom || by == 1) {
1279 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1280 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1281 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1285 static const uint8_t epel_weights[4][4][4] = {
1305 * For block x,y, determine which of the hpel planes to do bilinear
1306 * interpolation from and set src[] to the location in each hpel plane
1309 * @return the index of the put_dirac_pixels_tab function to use
1310 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1312 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1313 int x, int y, int ref, int plane)
1315 Plane *p = &s->plane[plane];
1316 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1317 int motion_x = block->u.mv[ref][0];
1318 int motion_y = block->u.mv[ref][1];
1319 int mx, my, i, epel, nplanes = 0;
1322 motion_x >>= s->chroma_x_shift;
1323 motion_y >>= s->chroma_y_shift;
1326 mx = motion_x & ~(-1 << s->mv_precision);
1327 my = motion_y & ~(-1 << s->mv_precision);
1328 motion_x >>= s->mv_precision;
1329 motion_y >>= s->mv_precision;
1330 /* normalize subpel coordinates to epel */
1331 /* TODO: template this function? */
1332 mx <<= 3 - s->mv_precision;
1333 my <<= 3 - s->mv_precision;
1342 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1346 for (i = 0; i < 4; i++)
1347 src[i] = ref_hpel[i] + y*p->stride + x;
1349 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1350 we increment x/y because the edge changes for half of the pixels */
1357 src[0] += p->stride;
1358 src[1] += p->stride;
1366 /* check if we really only need 2 planes since either mx or my is
1367 a hpel position. (epel weights of 0 handle this there) */
1369 /* mx == 0: average [0] and [2]
1370 mx == 4: average [1] and [3] */
1371 src[!mx] = src[2 + !!mx];
1373 } else if (!(my&3)) {
1374 src[0] = src[(my>>1) ];
1375 src[1] = src[(my>>1)+1];
1379 /* adjust the ordering if needed so the weights work */
1381 FFSWAP(const uint8_t *, src[0], src[1]);
1382 FFSWAP(const uint8_t *, src[2], src[3]);
1385 FFSWAP(const uint8_t *, src[0], src[2]);
1386 FFSWAP(const uint8_t *, src[1], src[3]);
1388 src[4] = epel_weights[my&3][mx&3];
1392 /* fixme: v/h _edge_pos */
1393 if ((unsigned)x > p->width +EDGE_WIDTH/2 - p->xblen ||
1394 (unsigned)y > p->height+EDGE_WIDTH/2 - p->yblen) {
1395 for (i = 0; i < nplanes; i++) {
1396 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1397 p->xblen, p->yblen, x, y,
1398 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1399 src[i] = s->edge_emu_buffer[i];
1402 return (nplanes>>1) + epel;
1405 static void add_dc(uint16_t *dst, int dc, int stride,
1406 uint8_t *obmc_weight, int xblen, int yblen)
1411 for (y = 0; y < yblen; y++) {
1412 for (x = 0; x < xblen; x += 2) {
1413 dst[x ] += dc * obmc_weight[x ];
1414 dst[x+1] += dc * obmc_weight[x+1];
1417 obmc_weight += MAX_BLOCKSIZE;
1421 static void block_mc(DiracContext *s, DiracBlock *block,
1422 uint16_t *mctmp, uint8_t *obmc_weight,
1423 int plane, int dstx, int dsty)
1425 Plane *p = &s->plane[plane];
1426 const uint8_t *src[5];
1429 switch (block->ref&3) {
1431 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1435 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1436 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1438 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1439 s->weight[0] + s->weight[1], p->yblen);
1442 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1443 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1444 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1445 if (s->biweight_func) {
1446 /* fixme: +32 is a quick hack */
1447 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1448 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1449 s->weight[0], s->weight[1], p->yblen);
1451 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1454 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1457 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1459 Plane *p = &s->plane[plane];
1460 int x, dstx = p->xbsep - p->xoffset;
1462 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1465 for (x = 1; x < s->blwidth-1; x++) {
1466 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1470 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1473 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1481 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1482 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1483 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1484 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1485 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1486 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1488 s->weight_func = NULL;
1489 s->biweight_func = NULL;
1493 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1495 /* chroma allocates an edge of 8 when subsampled
1496 which for 4:2:2 means an h edge of 16 and v edge of 8
1497 just use 8 for everything for the moment */
1498 int i, edge = EDGE_WIDTH/2;
1500 ref->hpel[plane][0] = ref->avframe.data[plane];
1501 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1503 /* no need for hpel if we only have fpel vectors */
1504 if (!s->mv_precision)
1507 for (i = 1; i < 4; i++) {
1508 if (!ref->hpel_base[plane][i])
1509 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1510 /* we need to be 16-byte aligned even for chroma */
1511 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1514 if (!ref->interpolated[plane]) {
1515 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1516 ref->hpel[plane][3], ref->hpel[plane][0],
1517 ref->avframe.linesize[plane], width, height);
1518 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1519 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1520 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1522 ref->interpolated[plane] = 1;
1526 * Dirac Specification ->
1527 * 13.0 Transform data syntax. transform_data()
1529 static int dirac_decode_frame_internal(DiracContext *s)
1532 int y, i, comp, dsty;
1535 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1536 for (comp = 0; comp < 3; comp++) {
1537 Plane *p = &s->plane[comp];
1538 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1544 for (comp = 0; comp < 3; comp++) {
1545 Plane *p = &s->plane[comp];
1546 uint8_t *frame = s->current_picture->avframe.data[comp];
1548 /* FIXME: small resolutions */
1549 for (i = 0; i < 4; i++)
1550 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1552 if (!s->zero_res && !s->low_delay)
1554 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1555 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1557 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1558 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1561 if (!s->num_refs) { /* intra */
1562 for (y = 0; y < p->height; y += 16) {
1563 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1564 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1565 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1567 } else { /* inter */
1568 int rowheight = p->ybsep*p->stride;
1570 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1572 for (i = 0; i < s->num_refs; i++)
1573 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1575 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1578 for (y = 0; y < s->blheight; y++) {
1580 start = FFMAX(dsty, 0);
1581 uint16_t *mctmp = s->mctmp + y*rowheight;
1582 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1584 init_obmc_weights(s, p, y);
1586 if (y == s->blheight-1 || start+p->ybsep > p->height)
1587 h = p->height - start;
1589 h = p->ybsep - (start - dsty);
1593 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1594 mc_row(s, blocks, mctmp, comp, dsty);
1596 mctmp += (start - dsty)*p->stride + p->xoffset;
1597 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1598 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1599 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1611 * Dirac Specification ->
1612 * 11.1.1 Picture Header. picture_header()
1614 static int dirac_decode_picture_header(DiracContext *s)
1617 int i, j, refnum, refdist;
1618 GetBitContext *gb = &s->gb;
1620 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1621 picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1624 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1626 /* if this is the first keyframe after a sequence header, start our
1627 reordering from here */
1628 if (s->frame_number < 0)
1629 s->frame_number = picnum;
1631 s->ref_pics[0] = s->ref_pics[1] = NULL;
1632 for (i = 0; i < s->num_refs; i++) {
1633 refnum = picnum + dirac_get_se_golomb(gb);
1636 /* find the closest reference to the one we want */
1637 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1638 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1639 if (s->ref_frames[j]
1640 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1641 s->ref_pics[i] = s->ref_frames[j];
1642 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1645 if (!s->ref_pics[i] || refdist)
1646 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1648 /* if there were no references at all, allocate one */
1649 if (!s->ref_pics[i])
1650 for (j = 0; j < MAX_FRAMES; j++)
1651 if (!s->all_frames[j].avframe.data[0]) {
1652 s->ref_pics[i] = &s->all_frames[j];
1653 s->avctx->get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1657 /* retire the reference frames that are not used anymore */
1658 if (s->current_picture->avframe.reference) {
1659 retire = picnum + dirac_get_se_golomb(gb);
1660 if (retire != picnum) {
1661 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1664 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1666 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1669 /* if reference array is full, remove the oldest as per the spec */
1670 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1671 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1672 remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1677 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1679 dirac_unpack_block_motion_data(s); /* [DIRAC_STD] 12. Block motion data syntax */
1681 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1688 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *data_size)
1690 DiracFrame *out = s->delay_frames[0];
1693 /* find frame with lowest picture number */
1694 for (i = 1; s->delay_frames[i]; i++)
1695 if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1696 out = s->delay_frames[i];
1700 for (i = out_idx; s->delay_frames[i]; i++)
1701 s->delay_frames[i] = s->delay_frames[i+1];
1704 out->avframe.reference ^= DELAYED_PIC_REF;
1705 *data_size = sizeof(AVFrame);
1706 *(AVFrame *)picture = out->avframe;
1713 * Dirac Specification ->
1714 * 9.6 Parse Info Header Syntax. parse_info()
1715 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1717 #define DATA_UNIT_HEADER_SIZE 13
1719 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1720 inside the function parse_sequence() */
1721 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1723 DiracContext *s = avctx->priv_data;
1724 DiracFrame *pic = NULL;
1725 int i, parse_code = buf[4];
1727 if (size < DATA_UNIT_HEADER_SIZE)
1730 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1732 if (parse_code == pc_seq_header) {
1733 if (s->seen_sequence_header)
1736 /* [DIRAC_STD] 10. Sequence header */
1737 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1740 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1742 if (alloc_sequence_buffers(s))
1745 s->seen_sequence_header = 1;
1746 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1747 free_sequence_buffers(s);
1748 s->seen_sequence_header = 0;
1749 } else if (parse_code == pc_aux_data) {
1750 if (buf[13] == 1) { /* encoder implementation/version */
1752 /* versions older than 1.0.8 don't store quant delta for
1753 subbands with only one codeblock */
1754 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1755 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1756 s->old_delta_quant = 1;
1758 } else if (parse_code & 0x8) { /* picture data unit */
1759 if (!s->seen_sequence_header) {
1760 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1764 /* find an unused frame */
1765 for (i = 0; i < MAX_FRAMES; i++)
1766 if (s->all_frames[i].avframe.data[0] == NULL)
1767 pic = &s->all_frames[i];
1769 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1773 avcodec_get_frame_defaults(&pic->avframe);
1775 /* [DIRAC_STD] Defined in 9.6.1 ... */
1776 s->num_refs = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1777 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1778 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1779 pic->avframe.reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1780 pic->avframe.key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1781 pic->avframe.pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1783 if (avctx->get_buffer(avctx, &pic->avframe) < 0) {
1784 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1787 s->current_picture = pic;
1788 s->plane[0].stride = pic->avframe.linesize[0];
1789 s->plane[1].stride = pic->avframe.linesize[1];
1790 s->plane[2].stride = pic->avframe.linesize[2];
1792 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1793 if (dirac_decode_picture_header(s))
1796 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1797 if (dirac_decode_frame_internal(s))
1803 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *pkt)
1805 DiracContext *s = avctx->priv_data;
1806 DiracFrame *picture = data;
1807 uint8_t *buf = pkt->data;
1808 int buf_size = pkt->size;
1809 int i, data_unit_size, buf_idx = 0;
1811 /* release unused frames */
1812 for (i = 0; i < MAX_FRAMES; i++)
1813 if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1814 avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1815 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1818 s->current_picture = NULL;
1821 /* end of stream, so flush delayed pics */
1823 return get_delayed_pic(s, (AVFrame *)data, data_size);
1826 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1827 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1828 BBCD start code search */
1829 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1830 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1831 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1834 /* BBCD found or end of data */
1835 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1838 data_unit_size = AV_RB32(buf+buf_idx+5);
1839 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1840 if(buf_idx + data_unit_size > buf_size)
1841 av_log(s->avctx, AV_LOG_ERROR,
1842 "Data unit with size %d is larger than input buffer, discarding\n",
1847 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1848 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1850 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1853 buf_idx += data_unit_size;
1856 if (!s->current_picture)
1859 if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1860 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1862 s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1864 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1865 int min_num = s->delay_frames[0]->avframe.display_picture_number;
1866 /* Too many delayed frames, so we display the frame with the lowest pts */
1867 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1868 delayed_frame = s->delay_frames[0];
1870 for (i = 1; s->delay_frames[i]; i++)
1871 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1872 min_num = s->delay_frames[i]->avframe.display_picture_number;
1874 delayed_frame = remove_frame(s->delay_frames, min_num);
1875 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1878 if (delayed_frame) {
1879 delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1880 *(AVFrame*)data = delayed_frame->avframe;
1881 *data_size = sizeof(AVFrame);
1883 } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1884 /* The right frame at the right time :-) */
1885 *(AVFrame*)data = s->current_picture->avframe;
1886 *data_size = sizeof(AVFrame);
1890 s->frame_number = picture->avframe.display_picture_number + 1;
1895 AVCodec ff_dirac_decoder = {
1897 .type = AVMEDIA_TYPE_VIDEO,
1898 .id = CODEC_ID_DIRAC,
1899 .priv_data_size = sizeof(DiracContext),
1900 .init = dirac_decode_init,
1902 .close = dirac_decode_end,
1903 .decode = dirac_decode_frame,
1904 .capabilities = CODEC_CAP_DELAY,
1905 .flush = dirac_decode_flush,
1906 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),