2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
34 #include "dirac_arith.h"
35 #include "mpeg12data.h"
41 * The spec limits the number of wavelet decompositions to 4 for both
42 * level 1 (VC-2) and 128 (long-gop default).
43 * 5 decompositions is the maximum before >16-bit buffers are needed.
44 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
45 * the others to 4 decompositions (or 3 for the fidelity filter).
47 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
49 #define MAX_DWT_LEVELS 5
52 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
54 #define MAX_REFERENCE_FRAMES 8
55 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
56 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
57 #define MAX_QUANT 68 /* max quant for VC-2 */
58 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
61 * DiracBlock->ref flags, if set then the block does MC from the given ref
63 #define DIRAC_REF_MASK_REF1 1
64 #define DIRAC_REF_MASK_REF2 2
65 #define DIRAC_REF_MASK_GLOBAL 4
68 * Value of Picture.reference when Picture is not a reference picture, but
69 * is held for delayed output.
71 #define DELAYED_PIC_REF 4
73 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
75 #define CALC_PADDING(size, depth) \
76 (((size + (1 << depth) - 1) >> depth) << depth)
78 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
82 int interpolated[3]; /* 1 if hpel[] is valid */
84 uint8_t *hpel_base[3][4];
91 } u; /* anonymous unions aren't in C99 :( */
95 typedef struct SubBand {
103 struct SubBand *parent;
107 const uint8_t *coeff_data;
110 typedef struct Plane {
119 IDWTELEM *idwt_buf_base;
125 /* block separation (block n+1 starts after this many pixels in block n) */
128 /* amount of overspill on each edge (half of the overlap between blocks) */
132 SubBand band[MAX_DWT_LEVELS][4];
135 typedef struct DiracContext {
136 AVCodecContext *avctx;
138 DiracDSPContext diracdsp;
140 dirac_source_params source;
141 int seen_sequence_header;
142 int frame_number; /* number of the next frame to display */
147 int zero_res; /* zero residue flag */
148 int is_arith; /* whether coeffs use arith or golomb coding */
149 int low_delay; /* use the low delay syntax */
150 int globalmc_flag; /* use global motion compensation */
151 int num_refs; /* number of reference pictures */
153 /* wavelet decoding */
154 unsigned wavelet_depth; /* depth of the IDWT */
155 unsigned wavelet_idx;
158 * schroedinger older than 1.0.8 doesn't store
159 * quant delta if only one codebook exists in a band
161 unsigned old_delta_quant;
162 unsigned codeblock_mode;
167 } codeblock[MAX_DWT_LEVELS+1];
170 unsigned num_x; /* number of horizontal slices */
171 unsigned num_y; /* number of vertical slices */
172 AVRational bytes; /* average bytes per slice */
173 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
177 int pan_tilt[2]; /* pan/tilt vector */
178 int zrs[2][2]; /* zoom/rotate/shear matrix */
179 int perspective[2]; /* perspective vector */
181 unsigned perspective_exp;
184 /* motion compensation */
185 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
186 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
187 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
189 int blwidth; /* number of blocks (horizontally) */
190 int blheight; /* number of blocks (vertically) */
191 int sbwidth; /* number of superblocks (horizontally) */
192 int sbheight; /* number of superblocks (vertically) */
195 DiracBlock *blmotion;
197 uint8_t *edge_emu_buffer[4];
198 uint8_t *edge_emu_buffer_base;
200 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
203 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
205 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
206 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
207 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
208 dirac_weight_func weight_func;
209 dirac_biweight_func biweight_func;
211 DiracFrame *current_picture;
212 DiracFrame *ref_pics[2];
214 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
215 DiracFrame *delay_frames[MAX_DELAY+1];
216 DiracFrame all_frames[MAX_FRAMES];
220 * Dirac Specification ->
221 * Parse code values. 9.6.1 Table 9.1
223 enum dirac_parse_code {
224 pc_seq_header = 0x00,
237 static const uint8_t default_qmat[][4][4] = {
238 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
239 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
242 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
243 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
244 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
247 static const int qscale_tab[MAX_QUANT+1] = {
248 4, 5, 6, 7, 8, 10, 11, 13,
249 16, 19, 23, 27, 32, 38, 45, 54,
250 64, 76, 91, 108, 128, 152, 181, 215,
251 256, 304, 362, 431, 512, 609, 724, 861,
252 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
253 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
254 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
258 static const int qoffset_intra_tab[MAX_QUANT+1] = {
259 1, 2, 3, 4, 4, 5, 6, 7,
260 8, 10, 12, 14, 16, 19, 23, 27,
261 32, 38, 46, 54, 64, 76, 91, 108,
262 128, 152, 181, 216, 256, 305, 362, 431,
263 512, 609, 724, 861, 1024, 1218, 1448, 1722,
264 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
265 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
269 static const int qoffset_inter_tab[MAX_QUANT+1] = {
270 1, 2, 2, 3, 3, 4, 4, 5,
271 6, 7, 9, 10, 12, 14, 17, 20,
272 24, 29, 34, 41, 48, 57, 68, 81,
273 96, 114, 136, 162, 192, 228, 272, 323,
274 384, 457, 543, 646, 768, 913, 1086, 1292,
275 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
276 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
280 /* magic number division by 3 from schroedinger */
281 static inline int divide3(int x)
283 return ((x+1)*21845 + 10922) >> 16;
286 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
288 DiracFrame *remove_pic = NULL;
289 int i, remove_idx = -1;
291 for (i = 0; framelist[i]; i++)
292 if (framelist[i]->avframe.display_picture_number == picnum) {
293 remove_pic = framelist[i];
298 for (i = remove_idx; framelist[i]; i++)
299 framelist[i] = framelist[i+1];
304 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
307 for (i = 0; i < maxframes; i++)
309 framelist[i] = frame;
315 static int alloc_sequence_buffers(DiracContext *s)
317 int sbwidth = DIVRNDUP(s->source.width, 4);
318 int sbheight = DIVRNDUP(s->source.height, 4);
319 int i, w, h, top_padding;
321 /* todo: think more about this / use or set Plane here */
322 for (i = 0; i < 3; i++) {
323 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
324 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
325 w = s->source.width >> (i ? s->chroma_x_shift : 0);
326 h = s->source.height >> (i ? s->chroma_y_shift : 0);
328 /* we allocate the max we support here since num decompositions can
329 * change from frame to frame. Stride is aligned to 16 for SIMD, and
330 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
331 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
333 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
334 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
335 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
337 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
338 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
339 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
340 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
341 return AVERROR(ENOMEM);
345 h = s->source.height;
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc(sbwidth * sbheight);
349 s->blmotion = av_malloc(sbwidth * sbheight * 16 * sizeof(*s->blmotion));
350 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
352 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h*MAX_BLOCKSIZE) * sizeof(*s->mctmp));
353 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
355 if (!s->sbsplit || !s->blmotion)
356 return AVERROR(ENOMEM);
360 static void free_sequence_buffers(DiracContext *s)
364 for (i = 0; i < MAX_FRAMES; i++) {
365 if (s->all_frames[i].avframe.data[0]) {
366 s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
367 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
370 for (j = 0; j < 3; j++)
371 for (k = 1; k < 4; k++)
372 av_freep(&s->all_frames[i].hpel_base[j][k]);
375 memset(s->ref_frames, 0, sizeof(s->ref_frames));
376 memset(s->delay_frames, 0, sizeof(s->delay_frames));
378 for (i = 0; i < 3; i++) {
379 av_freep(&s->plane[i].idwt_buf_base);
380 av_freep(&s->plane[i].idwt_tmp);
383 av_freep(&s->sbsplit);
384 av_freep(&s->blmotion);
385 av_freep(&s->edge_emu_buffer_base);
388 av_freep(&s->mcscratch);
391 static av_cold int dirac_decode_init(AVCodecContext *avctx)
393 DiracContext *s = avctx->priv_data;
395 s->frame_number = -1;
397 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
398 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
399 return AVERROR_PATCHWELCOME;
402 ff_dsputil_init(&s->dsp, avctx);
403 ff_diracdsp_init(&s->diracdsp);
408 static void dirac_decode_flush(AVCodecContext *avctx)
410 DiracContext *s = avctx->priv_data;
411 free_sequence_buffers(s);
412 s->seen_sequence_header = 0;
413 s->frame_number = -1;
416 static av_cold int dirac_decode_end(AVCodecContext *avctx)
418 dirac_decode_flush(avctx);
422 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
424 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
425 SubBand *b, IDWTELEM *buf, int x, int y)
429 int pred_ctx = CTX_ZPZN_F1;
431 /* Check if the parent subband has a 0 in the corresponding position */
433 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
435 if (b->orientation == subband_hl)
436 sign_pred = buf[-b->stride];
438 /* Determine if the pixel has only zeros in its neighbourhood */
440 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
441 if (b->orientation == subband_lh)
444 pred_ctx += !buf[-b->stride];
447 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
449 coeff = (coeff * qfactor + qoffset + 2) >> 2;
450 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
451 coeff = (coeff ^ -sign) + sign;
456 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
460 coeff = svq3_get_ue_golomb(gb);
462 coeff = (coeff * qfactor + qoffset + 2) >> 2;
463 sign = get_bits1(gb);
464 coeff = (coeff ^ -sign) + sign;
470 * Decode the coeffs in the rectangle defined by left, right, top, bottom
471 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
473 static inline void codeblock(DiracContext *s, SubBand *b,
474 GetBitContext *gb, DiracArith *c,
475 int left, int right, int top, int bottom,
476 int blockcnt_one, int is_arith)
478 int x, y, zero_block;
479 int qoffset, qfactor;
482 /* check for any coded coefficients in this codeblock */
485 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
487 zero_block = get_bits1(gb);
493 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
494 int quant = b->quant;
496 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
498 quant += dirac_get_se_golomb(gb);
500 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
506 b->quant = FFMIN(b->quant, MAX_QUANT);
508 qfactor = qscale_tab[b->quant];
509 /* TODO: context pointer? */
511 qoffset = qoffset_intra_tab[b->quant];
513 qoffset = qoffset_inter_tab[b->quant];
515 buf = b->ibuf + top * b->stride;
516 for (y = top; y < bottom; y++) {
517 for (x = left; x < right; x++) {
518 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
520 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
522 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
529 * Dirac Specification ->
530 * 13.3 intra_dc_prediction(band)
532 static inline void intra_dc_prediction(SubBand *b)
534 IDWTELEM *buf = b->ibuf;
537 for (x = 1; x < b->width; x++)
541 for (y = 1; y < b->height; y++) {
542 buf[0] += buf[-b->stride];
544 for (x = 1; x < b->width; x++) {
545 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
546 buf[x] += divide3(pred);
553 * Dirac Specification ->
554 * 13.4.2 Non-skipped subbands. subband_coeffs()
556 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
558 int cb_x, cb_y, left, right, top, bottom;
561 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
562 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
563 int blockcnt_one = (cb_width + cb_height) == 2;
568 init_get_bits(&gb, b->coeff_data, b->length*8);
571 ff_dirac_init_arith_decoder(&c, &gb, b->length);
574 for (cb_y = 0; cb_y < cb_height; cb_y++) {
575 bottom = (b->height * (cb_y+1)) / cb_height;
577 for (cb_x = 0; cb_x < cb_width; cb_x++) {
578 right = (b->width * (cb_x+1)) / cb_width;
579 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
585 if (b->orientation == subband_ll && s->num_refs == 0)
586 intra_dc_prediction(b);
589 static int decode_subband_arith(AVCodecContext *avctx, void *b)
591 DiracContext *s = avctx->priv_data;
592 decode_subband_internal(s, b, 1);
596 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
598 DiracContext *s = avctx->priv_data;
600 decode_subband_internal(s, *b, 0);
605 * Dirac Specification ->
606 * [DIRAC_STD] 13.4.1 core_transform_data()
608 static void decode_component(DiracContext *s, int comp)
610 AVCodecContext *avctx = s->avctx;
611 SubBand *bands[3*MAX_DWT_LEVELS+1];
612 enum dirac_subband orientation;
613 int level, num_bands = 0;
615 /* Unpack all subbands at all levels. */
616 for (level = 0; level < s->wavelet_depth; level++) {
617 for (orientation = !!level; orientation < 4; orientation++) {
618 SubBand *b = &s->plane[comp].band[level][orientation];
619 bands[num_bands++] = b;
621 align_get_bits(&s->gb);
622 /* [DIRAC_STD] 13.4.2 subband() */
623 b->length = svq3_get_ue_golomb(&s->gb);
625 b->quant = svq3_get_ue_golomb(&s->gb);
626 align_get_bits(&s->gb);
627 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
628 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
629 skip_bits_long(&s->gb, b->length*8);
632 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
634 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
635 NULL, 4-!!level, sizeof(SubBand));
637 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
639 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
642 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
643 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
644 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
645 int slice_x, int slice_y, int bits_end,
646 SubBand *b1, SubBand *b2)
648 int left = b1->width * slice_x / s->lowdelay.num_x;
649 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
650 int top = b1->height * slice_y / s->lowdelay.num_y;
651 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
653 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
654 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
656 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
657 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
659 /* we have to constantly check for overread since the spec explictly
660 requires this, with the meaning that all remaining coeffs are set to 0 */
661 if (get_bits_count(gb) >= bits_end)
664 for (y = top; y < bottom; y++) {
665 for (x = left; x < right; x++) {
666 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
667 if (get_bits_count(gb) >= bits_end)
670 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
671 if (get_bits_count(gb) >= bits_end)
681 struct lowdelay_slice {
690 * Dirac Specification ->
691 * 13.5.2 Slices. slice(sx,sy)
693 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
695 DiracContext *s = avctx->priv_data;
696 struct lowdelay_slice *slice = arg;
697 GetBitContext *gb = &slice->gb;
698 enum dirac_subband orientation;
699 int level, quant, chroma_bits, chroma_end;
701 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
702 int length_bits = av_log2(8 * slice->bytes)+1;
703 int luma_bits = get_bits_long(gb, length_bits);
704 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
706 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
707 for (level = 0; level < s->wavelet_depth; level++)
708 for (orientation = !!level; orientation < 4; orientation++) {
709 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
710 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
711 &s->plane[0].band[level][orientation], NULL);
714 /* consume any unused bits from luma */
715 skip_bits_long(gb, get_bits_count(gb) - luma_end);
717 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
718 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
719 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
720 for (level = 0; level < s->wavelet_depth; level++)
721 for (orientation = !!level; orientation < 4; orientation++) {
722 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
723 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
724 &s->plane[1].band[level][orientation],
725 &s->plane[2].band[level][orientation]);
732 * Dirac Specification ->
733 * 13.5.1 low_delay_transform_data()
735 static void decode_lowdelay(DiracContext *s)
737 AVCodecContext *avctx = s->avctx;
738 int slice_x, slice_y, bytes, bufsize;
740 struct lowdelay_slice *slices;
743 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
745 align_get_bits(&s->gb);
746 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
747 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
748 bufsize = get_bits_left(&s->gb);
750 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
751 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
752 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
753 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
755 slices[slice_num].bytes = bytes;
756 slices[slice_num].slice_x = slice_x;
757 slices[slice_num].slice_y = slice_y;
758 init_get_bits(&slices[slice_num].gb, buf, bufsize);
765 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
766 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
767 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
768 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
769 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
773 static void init_planes(DiracContext *s)
775 int i, w, h, level, orientation;
777 for (i = 0; i < 3; i++) {
778 Plane *p = &s->plane[i];
780 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
781 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
782 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
783 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
784 p->idwt_stride = FFALIGN(p->idwt_width, 8);
786 for (level = s->wavelet_depth-1; level >= 0; level--) {
789 for (orientation = !!level; orientation < 4; orientation++) {
790 SubBand *b = &p->band[level][orientation];
792 b->ibuf = p->idwt_buf;
794 b->stride = p->idwt_stride << (s->wavelet_depth - level);
797 b->orientation = orientation;
802 b->ibuf += b->stride>>1;
805 b->parent = &p->band[level-1][orientation];
810 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
811 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
812 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
813 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
816 p->xoffset = (p->xblen - p->xbsep)/2;
817 p->yoffset = (p->yblen - p->ybsep)/2;
822 * Unpack the motion compensation parameters
823 * Dirac Specification ->
824 * 11.2 Picture prediction data. picture_prediction()
826 static int dirac_unpack_prediction_parameters(DiracContext *s)
828 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
829 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
831 GetBitContext *gb = &s->gb;
835 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
836 /* Luma and Chroma are equal. 11.2.3 */
837 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
840 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
845 s->plane[0].xblen = svq3_get_ue_golomb(gb);
846 s->plane[0].yblen = svq3_get_ue_golomb(gb);
847 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
848 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
850 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
851 s->plane[0].xblen = default_blen[idx-1];
852 s->plane[0].yblen = default_blen[idx-1];
853 s->plane[0].xbsep = default_bsep[idx-1];
854 s->plane[0].ybsep = default_bsep[idx-1];
856 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
857 Calculated in function dirac_unpack_block_motion_data */
859 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
860 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
863 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
864 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
867 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
868 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
872 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
873 Read motion vector precision */
874 s->mv_precision = svq3_get_ue_golomb(gb);
875 if (s->mv_precision > 3) {
876 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
880 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
881 Read the global motion compensation parameters */
882 s->globalmc_flag = get_bits1(gb);
883 if (s->globalmc_flag) {
884 memset(s->globalmc, 0, sizeof(s->globalmc));
885 /* [DIRAC_STD] pan_tilt(gparams) */
886 for (ref = 0; ref < s->num_refs; ref++) {
888 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
889 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
891 /* [DIRAC_STD] zoom_rotate_shear(gparams)
892 zoom/rotation/shear parameters */
894 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
895 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
896 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
897 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
898 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
900 s->globalmc[ref].zrs[0][0] = 1;
901 s->globalmc[ref].zrs[1][1] = 1;
903 /* [DIRAC_STD] perspective(gparams) */
905 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
906 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
907 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
912 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
913 Picture prediction mode, not currently used. */
914 if (svq3_get_ue_golomb(gb)) {
915 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
919 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
920 just data read, weight calculation will be done later on. */
921 s->weight_log2denom = 1;
926 s->weight_log2denom = svq3_get_ue_golomb(gb);
927 s->weight[0] = dirac_get_se_golomb(gb);
928 if (s->num_refs == 2)
929 s->weight[1] = dirac_get_se_golomb(gb);
935 * Dirac Specification ->
936 * 11.3 Wavelet transform data. wavelet_transform()
938 static int dirac_unpack_idwt_params(DiracContext *s)
940 GetBitContext *gb = &s->gb;
944 #define CHECKEDREAD(dst, cond, errmsg) \
945 tmp = svq3_get_ue_golomb(gb); \
947 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
954 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
958 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
959 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
961 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
964 /* Codeblock parameters (core syntax only) */
966 for (i = 0; i <= s->wavelet_depth; i++) {
967 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
968 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
971 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
973 for (i = 0; i <= s->wavelet_depth; i++)
974 s->codeblock[i].width = s->codeblock[i].height = 1;
976 /* Slice parameters + quantization matrix*/
977 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
978 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
979 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
980 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
981 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
983 if (s->lowdelay.bytes.den <= 0) {
984 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
985 return AVERROR_INVALIDDATA;
988 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
990 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
991 /* custom quantization matrix */
992 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
993 for (level = 0; level < s->wavelet_depth; level++) {
994 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
995 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
996 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
999 if (s->wavelet_depth > 4) {
1000 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1001 return AVERROR_INVALIDDATA;
1003 /* default quantization matrix */
1004 for (level = 0; level < s->wavelet_depth; level++)
1005 for (i = 0; i < 4; i++) {
1006 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1007 /* haar with no shift differs for different depths */
1008 if (s->wavelet_idx == 3)
1009 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1016 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1018 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1025 return sbsplit[-stride];
1027 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1030 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1037 return block[-1].ref & refmask;
1039 return block[-stride].ref & refmask;
1041 /* return the majority */
1042 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1043 return (pred >> 1) & refmask;
1046 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1050 memset(block->u.dc, 0, sizeof(block->u.dc));
1052 if (x && !(block[-1].ref & 3)) {
1053 for (i = 0; i < 3; i++)
1054 block->u.dc[i] += block[-1].u.dc[i];
1058 if (y && !(block[-stride].ref & 3)) {
1059 for (i = 0; i < 3; i++)
1060 block->u.dc[i] += block[-stride].u.dc[i];
1064 if (x && y && !(block[-1-stride].ref & 3)) {
1065 for (i = 0; i < 3; i++)
1066 block->u.dc[i] += block[-1-stride].u.dc[i];
1071 for (i = 0; i < 3; i++)
1072 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1073 } else if (n == 3) {
1074 for (i = 0; i < 3; i++)
1075 block->u.dc[i] = divide3(block->u.dc[i]);
1079 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1082 int refmask = ref+1;
1083 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1086 if (x && (block[-1].ref & mask) == refmask)
1087 pred[n++] = block[-1].u.mv[ref];
1089 if (y && (block[-stride].ref & mask) == refmask)
1090 pred[n++] = block[-stride].u.mv[ref];
1092 if (x && y && (block[-stride-1].ref & mask) == refmask)
1093 pred[n++] = block[-stride-1].u.mv[ref];
1097 block->u.mv[ref][0] = 0;
1098 block->u.mv[ref][1] = 0;
1101 block->u.mv[ref][0] = pred[0][0];
1102 block->u.mv[ref][1] = pred[0][1];
1105 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1106 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1109 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1110 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1115 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1117 int ez = s->globalmc[ref].zrs_exp;
1118 int ep = s->globalmc[ref].perspective_exp;
1119 int (*A)[2] = s->globalmc[ref].zrs;
1120 int *b = s->globalmc[ref].pan_tilt;
1121 int *c = s->globalmc[ref].perspective;
1123 int m = (1<<ep) - (c[0]*x + c[1]*y);
1124 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1125 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1127 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1128 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1131 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1132 int stride, int x, int y)
1136 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1137 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1139 if (s->num_refs == 2) {
1140 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1141 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1145 pred_block_dc(block, stride, x, y);
1146 for (i = 0; i < 3; i++)
1147 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1151 if (s->globalmc_flag) {
1152 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1153 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1156 for (i = 0; i < s->num_refs; i++)
1157 if (block->ref & (i+1)) {
1158 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1159 global_mv(s, block, x, y, i);
1161 pred_mv(block, stride, x, y, i);
1162 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1163 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1169 * Copies the current block to the other blocks covered by the current superblock split mode
1171 static void propagate_block_data(DiracBlock *block, int stride, int size)
1174 DiracBlock *dst = block;
1176 for (x = 1; x < size; x++)
1179 for (y = 1; y < size; y++) {
1181 for (x = 0; x < size; x++)
1187 * Dirac Specification ->
1188 * 12. Block motion data syntax
1190 static int dirac_unpack_block_motion_data(DiracContext *s)
1192 GetBitContext *gb = &s->gb;
1193 uint8_t *sbsplit = s->sbsplit;
1195 DiracArith arith[8];
1199 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1200 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1201 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1202 s->blwidth = 4 * s->sbwidth;
1203 s->blheight = 4 * s->sbheight;
1205 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1206 decode superblock split modes */
1207 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1208 for (y = 0; y < s->sbheight; y++) {
1209 for (x = 0; x < s->sbwidth; x++) {
1210 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1213 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1215 sbsplit += s->sbwidth;
1218 /* setup arith decoding */
1219 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1220 for (i = 0; i < s->num_refs; i++) {
1221 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1222 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1224 for (i = 0; i < 3; i++)
1225 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1227 for (y = 0; y < s->sbheight; y++)
1228 for (x = 0; x < s->sbwidth; x++) {
1229 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1230 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1232 for (q = 0; q < blkcnt; q++)
1233 for (p = 0; p < blkcnt; p++) {
1234 int bx = 4 * x + p*step;
1235 int by = 4 * y + q*step;
1236 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1237 decode_block_params(s, arith, block, s->blwidth, bx, by);
1238 propagate_block_data(block, s->blwidth, step);
1245 static int weight(int i, int blen, int offset)
1247 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1248 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1252 else if (i > blen-1 - 2*offset)
1253 return ROLLOFF(blen-1 - i);
1257 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1258 int left, int right, int wy)
1261 for (x = 0; left && x < p->xblen >> 1; x++)
1262 obmc_weight[x] = wy*8;
1263 for (; x < p->xblen >> right; x++)
1264 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1265 for (; x < p->xblen; x++)
1266 obmc_weight[x] = wy*8;
1267 for (; x < stride; x++)
1271 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1272 int left, int right, int top, int bottom)
1275 for (y = 0; top && y < p->yblen >> 1; y++) {
1276 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1277 obmc_weight += stride;
1279 for (; y < p->yblen >> bottom; y++) {
1280 int wy = weight(y, p->yblen, p->yoffset);
1281 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1282 obmc_weight += stride;
1284 for (; y < p->yblen; y++) {
1285 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1286 obmc_weight += stride;
1290 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1293 int bottom = by == s->blheight-1;
1295 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1296 if (top || bottom || by == 1) {
1297 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1298 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1299 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1303 static const uint8_t epel_weights[4][4][4] = {
1323 * For block x,y, determine which of the hpel planes to do bilinear
1324 * interpolation from and set src[] to the location in each hpel plane
1327 * @return the index of the put_dirac_pixels_tab function to use
1328 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1330 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1331 int x, int y, int ref, int plane)
1333 Plane *p = &s->plane[plane];
1334 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1335 int motion_x = block->u.mv[ref][0];
1336 int motion_y = block->u.mv[ref][1];
1337 int mx, my, i, epel, nplanes = 0;
1340 motion_x >>= s->chroma_x_shift;
1341 motion_y >>= s->chroma_y_shift;
1344 mx = motion_x & ~(-1 << s->mv_precision);
1345 my = motion_y & ~(-1 << s->mv_precision);
1346 motion_x >>= s->mv_precision;
1347 motion_y >>= s->mv_precision;
1348 /* normalize subpel coordinates to epel */
1349 /* TODO: template this function? */
1350 mx <<= 3 - s->mv_precision;
1351 my <<= 3 - s->mv_precision;
1360 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1364 for (i = 0; i < 4; i++)
1365 src[i] = ref_hpel[i] + y*p->stride + x;
1367 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1368 we increment x/y because the edge changes for half of the pixels */
1375 src[0] += p->stride;
1376 src[1] += p->stride;
1384 /* check if we really only need 2 planes since either mx or my is
1385 a hpel position. (epel weights of 0 handle this there) */
1387 /* mx == 0: average [0] and [2]
1388 mx == 4: average [1] and [3] */
1389 src[!mx] = src[2 + !!mx];
1391 } else if (!(my&3)) {
1392 src[0] = src[(my>>1) ];
1393 src[1] = src[(my>>1)+1];
1397 /* adjust the ordering if needed so the weights work */
1399 FFSWAP(const uint8_t *, src[0], src[1]);
1400 FFSWAP(const uint8_t *, src[2], src[3]);
1403 FFSWAP(const uint8_t *, src[0], src[2]);
1404 FFSWAP(const uint8_t *, src[1], src[3]);
1406 src[4] = epel_weights[my&3][mx&3];
1410 /* fixme: v/h _edge_pos */
1411 if ((unsigned)x > FFMAX(p->width +EDGE_WIDTH/2 - p->xblen, 0) ||
1412 (unsigned)y > FFMAX(p->height+EDGE_WIDTH/2 - p->yblen, 0)) {
1413 for (i = 0; i < nplanes; i++) {
1414 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1415 p->xblen, p->yblen, x, y,
1416 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1417 src[i] = s->edge_emu_buffer[i];
1420 return (nplanes>>1) + epel;
1423 static void add_dc(uint16_t *dst, int dc, int stride,
1424 uint8_t *obmc_weight, int xblen, int yblen)
1429 for (y = 0; y < yblen; y++) {
1430 for (x = 0; x < xblen; x += 2) {
1431 dst[x ] += dc * obmc_weight[x ];
1432 dst[x+1] += dc * obmc_weight[x+1];
1435 obmc_weight += MAX_BLOCKSIZE;
1439 static void block_mc(DiracContext *s, DiracBlock *block,
1440 uint16_t *mctmp, uint8_t *obmc_weight,
1441 int plane, int dstx, int dsty)
1443 Plane *p = &s->plane[plane];
1444 const uint8_t *src[5];
1447 switch (block->ref&3) {
1449 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1453 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1454 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1456 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1457 s->weight[0] + s->weight[1], p->yblen);
1460 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1461 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1462 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1463 if (s->biweight_func) {
1464 /* fixme: +32 is a quick hack */
1465 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1466 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1467 s->weight[0], s->weight[1], p->yblen);
1469 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1472 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1475 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1477 Plane *p = &s->plane[plane];
1478 int x, dstx = p->xbsep - p->xoffset;
1480 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1483 for (x = 1; x < s->blwidth-1; x++) {
1484 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1488 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1491 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1499 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1500 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1501 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1502 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1503 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1504 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1506 s->weight_func = NULL;
1507 s->biweight_func = NULL;
1511 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1513 /* chroma allocates an edge of 8 when subsampled
1514 which for 4:2:2 means an h edge of 16 and v edge of 8
1515 just use 8 for everything for the moment */
1516 int i, edge = EDGE_WIDTH/2;
1518 ref->hpel[plane][0] = ref->avframe.data[plane];
1519 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1521 /* no need for hpel if we only have fpel vectors */
1522 if (!s->mv_precision)
1525 for (i = 1; i < 4; i++) {
1526 if (!ref->hpel_base[plane][i])
1527 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1528 /* we need to be 16-byte aligned even for chroma */
1529 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1532 if (!ref->interpolated[plane]) {
1533 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1534 ref->hpel[plane][3], ref->hpel[plane][0],
1535 ref->avframe.linesize[plane], width, height);
1536 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1537 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1538 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1540 ref->interpolated[plane] = 1;
1544 * Dirac Specification ->
1545 * 13.0 Transform data syntax. transform_data()
1547 static int dirac_decode_frame_internal(DiracContext *s)
1550 int y, i, comp, dsty;
1553 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1554 for (comp = 0; comp < 3; comp++) {
1555 Plane *p = &s->plane[comp];
1556 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1562 for (comp = 0; comp < 3; comp++) {
1563 Plane *p = &s->plane[comp];
1564 uint8_t *frame = s->current_picture->avframe.data[comp];
1566 /* FIXME: small resolutions */
1567 for (i = 0; i < 4; i++)
1568 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1570 if (!s->zero_res && !s->low_delay)
1572 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1573 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1575 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1576 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1579 if (!s->num_refs) { /* intra */
1580 for (y = 0; y < p->height; y += 16) {
1581 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1582 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1583 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1585 } else { /* inter */
1586 int rowheight = p->ybsep*p->stride;
1588 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1590 for (i = 0; i < s->num_refs; i++)
1591 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1593 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1596 for (y = 0; y < s->blheight; y++) {
1598 start = FFMAX(dsty, 0);
1599 uint16_t *mctmp = s->mctmp + y*rowheight;
1600 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1602 init_obmc_weights(s, p, y);
1604 if (y == s->blheight-1 || start+p->ybsep > p->height)
1605 h = p->height - start;
1607 h = p->ybsep - (start - dsty);
1611 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1612 mc_row(s, blocks, mctmp, comp, dsty);
1614 mctmp += (start - dsty)*p->stride + p->xoffset;
1615 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1616 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1617 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1629 * Dirac Specification ->
1630 * 11.1.1 Picture Header. picture_header()
1632 static int dirac_decode_picture_header(DiracContext *s)
1635 int i, j, refnum, refdist;
1636 GetBitContext *gb = &s->gb;
1638 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1639 picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1642 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1644 /* if this is the first keyframe after a sequence header, start our
1645 reordering from here */
1646 if (s->frame_number < 0)
1647 s->frame_number = picnum;
1649 s->ref_pics[0] = s->ref_pics[1] = NULL;
1650 for (i = 0; i < s->num_refs; i++) {
1651 refnum = picnum + dirac_get_se_golomb(gb);
1654 /* find the closest reference to the one we want */
1655 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1656 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1657 if (s->ref_frames[j]
1658 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1659 s->ref_pics[i] = s->ref_frames[j];
1660 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1663 if (!s->ref_pics[i] || refdist)
1664 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1666 /* if there were no references at all, allocate one */
1667 if (!s->ref_pics[i])
1668 for (j = 0; j < MAX_FRAMES; j++)
1669 if (!s->all_frames[j].avframe.data[0]) {
1670 s->ref_pics[i] = &s->all_frames[j];
1671 s->avctx->get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1676 /* retire the reference frames that are not used anymore */
1677 if (s->current_picture->avframe.reference) {
1678 retire = picnum + dirac_get_se_golomb(gb);
1679 if (retire != picnum) {
1680 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1683 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1685 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1688 /* if reference array is full, remove the oldest as per the spec */
1689 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1690 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1691 remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1696 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1698 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1701 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1708 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *data_size)
1710 DiracFrame *out = s->delay_frames[0];
1713 /* find frame with lowest picture number */
1714 for (i = 1; s->delay_frames[i]; i++)
1715 if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1716 out = s->delay_frames[i];
1720 for (i = out_idx; s->delay_frames[i]; i++)
1721 s->delay_frames[i] = s->delay_frames[i+1];
1724 out->avframe.reference ^= DELAYED_PIC_REF;
1725 *data_size = sizeof(AVFrame);
1726 *(AVFrame *)picture = out->avframe;
1733 * Dirac Specification ->
1734 * 9.6 Parse Info Header Syntax. parse_info()
1735 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1737 #define DATA_UNIT_HEADER_SIZE 13
1739 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1740 inside the function parse_sequence() */
1741 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1743 DiracContext *s = avctx->priv_data;
1744 DiracFrame *pic = NULL;
1745 int i, parse_code = buf[4];
1748 if (size < DATA_UNIT_HEADER_SIZE)
1751 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1753 if (parse_code == pc_seq_header) {
1754 if (s->seen_sequence_header)
1757 /* [DIRAC_STD] 10. Sequence header */
1758 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1761 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1763 if (alloc_sequence_buffers(s))
1766 s->seen_sequence_header = 1;
1767 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1768 free_sequence_buffers(s);
1769 s->seen_sequence_header = 0;
1770 } else if (parse_code == pc_aux_data) {
1771 if (buf[13] == 1) { /* encoder implementation/version */
1773 /* versions older than 1.0.8 don't store quant delta for
1774 subbands with only one codeblock */
1775 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1776 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1777 s->old_delta_quant = 1;
1779 } else if (parse_code & 0x8) { /* picture data unit */
1780 if (!s->seen_sequence_header) {
1781 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1785 /* find an unused frame */
1786 for (i = 0; i < MAX_FRAMES; i++)
1787 if (s->all_frames[i].avframe.data[0] == NULL)
1788 pic = &s->all_frames[i];
1790 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1794 avcodec_get_frame_defaults(&pic->avframe);
1796 /* [DIRAC_STD] Defined in 9.6.1 ... */
1797 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1799 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1803 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1804 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1805 pic->avframe.reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1806 pic->avframe.key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1807 pic->avframe.pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1809 if (avctx->get_buffer(avctx, &pic->avframe) < 0) {
1810 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1813 s->current_picture = pic;
1814 s->plane[0].stride = pic->avframe.linesize[0];
1815 s->plane[1].stride = pic->avframe.linesize[1];
1816 s->plane[2].stride = pic->avframe.linesize[2];
1818 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1819 if (dirac_decode_picture_header(s))
1822 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1823 if (dirac_decode_frame_internal(s))
1829 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *pkt)
1831 DiracContext *s = avctx->priv_data;
1832 DiracFrame *picture = data;
1833 uint8_t *buf = pkt->data;
1834 int buf_size = pkt->size;
1835 int i, data_unit_size, buf_idx = 0;
1837 /* release unused frames */
1838 for (i = 0; i < MAX_FRAMES; i++)
1839 if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1840 avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1841 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1844 s->current_picture = NULL;
1847 /* end of stream, so flush delayed pics */
1849 return get_delayed_pic(s, (AVFrame *)data, data_size);
1852 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1853 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1854 BBCD start code search */
1855 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1856 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1857 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1860 /* BBCD found or end of data */
1861 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1864 data_unit_size = AV_RB32(buf+buf_idx+5);
1865 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1866 if(buf_idx + data_unit_size > buf_size)
1867 av_log(s->avctx, AV_LOG_ERROR,
1868 "Data unit with size %d is larger than input buffer, discarding\n",
1873 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1874 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1876 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1879 buf_idx += data_unit_size;
1882 if (!s->current_picture)
1885 if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1886 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1888 s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1890 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1891 int min_num = s->delay_frames[0]->avframe.display_picture_number;
1892 /* Too many delayed frames, so we display the frame with the lowest pts */
1893 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1894 delayed_frame = s->delay_frames[0];
1896 for (i = 1; s->delay_frames[i]; i++)
1897 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1898 min_num = s->delay_frames[i]->avframe.display_picture_number;
1900 delayed_frame = remove_frame(s->delay_frames, min_num);
1901 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1904 if (delayed_frame) {
1905 delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1906 *(AVFrame*)data = delayed_frame->avframe;
1907 *data_size = sizeof(AVFrame);
1909 } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1910 /* The right frame at the right time :-) */
1911 *(AVFrame*)data = s->current_picture->avframe;
1912 *data_size = sizeof(AVFrame);
1916 s->frame_number = picture->avframe.display_picture_number + 1;
1921 AVCodec ff_dirac_decoder = {
1923 .type = AVMEDIA_TYPE_VIDEO,
1924 .id = AV_CODEC_ID_DIRAC,
1925 .priv_data_size = sizeof(DiracContext),
1926 .init = dirac_decode_init,
1927 .close = dirac_decode_end,
1928 .decode = dirac_decode_frame,
1929 .capabilities = CODEC_CAP_DELAY,
1930 .flush = dirac_decode_flush,
1931 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),