2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
35 #include "dirac_arith.h"
36 #include "mpeg12data.h"
42 * The spec limits the number of wavelet decompositions to 4 for both
43 * level 1 (VC-2) and 128 (long-gop default).
44 * 5 decompositions is the maximum before >16-bit buffers are needed.
45 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
46 * the others to 4 decompositions (or 3 for the fidelity filter).
48 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
50 #define MAX_DWT_LEVELS 5
53 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
55 #define MAX_REFERENCE_FRAMES 8
56 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
57 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
58 #define MAX_QUANT 68 /* max quant for VC-2 */
59 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
62 * DiracBlock->ref flags, if set then the block does MC from the given ref
64 #define DIRAC_REF_MASK_REF1 1
65 #define DIRAC_REF_MASK_REF2 2
66 #define DIRAC_REF_MASK_GLOBAL 4
69 * Value of Picture.reference when Picture is not a reference picture, but
70 * is held for delayed output.
72 #define DELAYED_PIC_REF 4
74 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
76 #define CALC_PADDING(size, depth) \
77 (((size + (1 << depth) - 1) >> depth) << depth)
79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
83 int interpolated[3]; /* 1 if hpel[] is valid */
85 uint8_t *hpel_base[3][4];
92 } u; /* anonymous unions aren't in C99 :( */
96 typedef struct SubBand {
104 struct SubBand *parent;
108 const uint8_t *coeff_data;
111 typedef struct Plane {
120 IDWTELEM *idwt_buf_base;
126 /* block separation (block n+1 starts after this many pixels in block n) */
129 /* amount of overspill on each edge (half of the overlap between blocks) */
133 SubBand band[MAX_DWT_LEVELS][4];
136 typedef struct DiracContext {
137 AVCodecContext *avctx;
139 DiracDSPContext diracdsp;
141 dirac_source_params source;
142 int seen_sequence_header;
143 int frame_number; /* number of the next frame to display */
148 int zero_res; /* zero residue flag */
149 int is_arith; /* whether coeffs use arith or golomb coding */
150 int low_delay; /* use the low delay syntax */
151 int globalmc_flag; /* use global motion compensation */
152 int num_refs; /* number of reference pictures */
154 /* wavelet decoding */
155 unsigned wavelet_depth; /* depth of the IDWT */
156 unsigned wavelet_idx;
159 * schroedinger older than 1.0.8 doesn't store
160 * quant delta if only one codebook exists in a band
162 unsigned old_delta_quant;
163 unsigned codeblock_mode;
168 } codeblock[MAX_DWT_LEVELS+1];
171 unsigned num_x; /* number of horizontal slices */
172 unsigned num_y; /* number of vertical slices */
173 AVRational bytes; /* average bytes per slice */
174 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
178 int pan_tilt[2]; /* pan/tilt vector */
179 int zrs[2][2]; /* zoom/rotate/shear matrix */
180 int perspective[2]; /* perspective vector */
182 unsigned perspective_exp;
185 /* motion compensation */
186 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
187 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
188 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
190 int blwidth; /* number of blocks (horizontally) */
191 int blheight; /* number of blocks (vertically) */
192 int sbwidth; /* number of superblocks (horizontally) */
193 int sbheight; /* number of superblocks (vertically) */
196 DiracBlock *blmotion;
198 uint8_t *edge_emu_buffer[4];
199 uint8_t *edge_emu_buffer_base;
201 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
204 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
206 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
207 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
208 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
209 dirac_weight_func weight_func;
210 dirac_biweight_func biweight_func;
212 DiracFrame *current_picture;
213 DiracFrame *ref_pics[2];
215 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
216 DiracFrame *delay_frames[MAX_DELAY+1];
217 DiracFrame all_frames[MAX_FRAMES];
221 * Dirac Specification ->
222 * Parse code values. 9.6.1 Table 9.1
224 enum dirac_parse_code {
225 pc_seq_header = 0x00,
238 static const uint8_t default_qmat[][4][4] = {
239 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
240 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
241 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
242 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
243 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
244 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
245 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
248 static const int qscale_tab[MAX_QUANT+1] = {
249 4, 5, 6, 7, 8, 10, 11, 13,
250 16, 19, 23, 27, 32, 38, 45, 54,
251 64, 76, 91, 108, 128, 152, 181, 215,
252 256, 304, 362, 431, 512, 609, 724, 861,
253 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
254 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
255 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
259 static const int qoffset_intra_tab[MAX_QUANT+1] = {
260 1, 2, 3, 4, 4, 5, 6, 7,
261 8, 10, 12, 14, 16, 19, 23, 27,
262 32, 38, 46, 54, 64, 76, 91, 108,
263 128, 152, 181, 216, 256, 305, 362, 431,
264 512, 609, 724, 861, 1024, 1218, 1448, 1722,
265 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
266 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
270 static const int qoffset_inter_tab[MAX_QUANT+1] = {
271 1, 2, 2, 3, 3, 4, 4, 5,
272 6, 7, 9, 10, 12, 14, 17, 20,
273 24, 29, 34, 41, 48, 57, 68, 81,
274 96, 114, 136, 162, 192, 228, 272, 323,
275 384, 457, 543, 646, 768, 913, 1086, 1292,
276 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
277 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
281 /* magic number division by 3 from schroedinger */
282 static inline int divide3(int x)
284 return ((x+1)*21845 + 10922) >> 16;
287 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
289 DiracFrame *remove_pic = NULL;
290 int i, remove_idx = -1;
292 for (i = 0; framelist[i]; i++)
293 if (framelist[i]->avframe.display_picture_number == picnum) {
294 remove_pic = framelist[i];
299 for (i = remove_idx; framelist[i]; i++)
300 framelist[i] = framelist[i+1];
305 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
308 for (i = 0; i < maxframes; i++)
310 framelist[i] = frame;
316 static int alloc_sequence_buffers(DiracContext *s)
318 int sbwidth = DIVRNDUP(s->source.width, 4);
319 int sbheight = DIVRNDUP(s->source.height, 4);
320 int i, w, h, top_padding;
322 /* todo: think more about this / use or set Plane here */
323 for (i = 0; i < 3; i++) {
324 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
325 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
326 w = s->source.width >> (i ? s->chroma_x_shift : 0);
327 h = s->source.height >> (i ? s->chroma_y_shift : 0);
329 /* we allocate the max we support here since num decompositions can
330 * change from frame to frame. Stride is aligned to 16 for SIMD, and
331 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
332 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
334 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
335 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
336 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
338 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
339 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
340 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
341 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
342 return AVERROR(ENOMEM);
346 h = s->source.height;
348 /* fixme: allocate using real stride here */
349 s->sbsplit = av_malloc(sbwidth * sbheight);
350 s->blmotion = av_malloc(sbwidth * sbheight * 16 * sizeof(*s->blmotion));
351 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
353 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
354 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
356 if (!s->sbsplit || !s->blmotion || !s->mctmp || !s->mcscratch)
357 return AVERROR(ENOMEM);
361 static void free_sequence_buffers(DiracContext *s)
365 for (i = 0; i < MAX_FRAMES; i++) {
366 if (s->all_frames[i].avframe.data[0]) {
367 s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
368 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
371 for (j = 0; j < 3; j++)
372 for (k = 1; k < 4; k++)
373 av_freep(&s->all_frames[i].hpel_base[j][k]);
376 memset(s->ref_frames, 0, sizeof(s->ref_frames));
377 memset(s->delay_frames, 0, sizeof(s->delay_frames));
379 for (i = 0; i < 3; i++) {
380 av_freep(&s->plane[i].idwt_buf_base);
381 av_freep(&s->plane[i].idwt_tmp);
384 av_freep(&s->sbsplit);
385 av_freep(&s->blmotion);
386 av_freep(&s->edge_emu_buffer_base);
389 av_freep(&s->mcscratch);
392 static av_cold int dirac_decode_init(AVCodecContext *avctx)
394 DiracContext *s = avctx->priv_data;
396 s->frame_number = -1;
398 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
399 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
400 return AVERROR_PATCHWELCOME;
403 ff_dsputil_init(&s->dsp, avctx);
404 ff_diracdsp_init(&s->diracdsp);
409 static void dirac_decode_flush(AVCodecContext *avctx)
411 DiracContext *s = avctx->priv_data;
412 free_sequence_buffers(s);
413 s->seen_sequence_header = 0;
414 s->frame_number = -1;
417 static av_cold int dirac_decode_end(AVCodecContext *avctx)
419 dirac_decode_flush(avctx);
423 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
425 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
426 SubBand *b, IDWTELEM *buf, int x, int y)
430 int pred_ctx = CTX_ZPZN_F1;
432 /* Check if the parent subband has a 0 in the corresponding position */
434 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
436 if (b->orientation == subband_hl)
437 sign_pred = buf[-b->stride];
439 /* Determine if the pixel has only zeros in its neighbourhood */
441 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
442 if (b->orientation == subband_lh)
445 pred_ctx += !buf[-b->stride];
448 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
450 coeff = (coeff * qfactor + qoffset + 2) >> 2;
451 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
452 coeff = (coeff ^ -sign) + sign;
457 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
461 coeff = svq3_get_ue_golomb(gb);
463 coeff = (coeff * qfactor + qoffset + 2) >> 2;
464 sign = get_bits1(gb);
465 coeff = (coeff ^ -sign) + sign;
471 * Decode the coeffs in the rectangle defined by left, right, top, bottom
472 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
474 static inline void codeblock(DiracContext *s, SubBand *b,
475 GetBitContext *gb, DiracArith *c,
476 int left, int right, int top, int bottom,
477 int blockcnt_one, int is_arith)
479 int x, y, zero_block;
480 int qoffset, qfactor;
483 /* check for any coded coefficients in this codeblock */
486 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
488 zero_block = get_bits1(gb);
494 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
495 int quant = b->quant;
497 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
499 quant += dirac_get_se_golomb(gb);
501 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
507 b->quant = FFMIN(b->quant, MAX_QUANT);
509 qfactor = qscale_tab[b->quant];
510 /* TODO: context pointer? */
512 qoffset = qoffset_intra_tab[b->quant];
514 qoffset = qoffset_inter_tab[b->quant];
516 buf = b->ibuf + top * b->stride;
517 for (y = top; y < bottom; y++) {
518 for (x = left; x < right; x++) {
519 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
521 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
523 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
530 * Dirac Specification ->
531 * 13.3 intra_dc_prediction(band)
533 static inline void intra_dc_prediction(SubBand *b)
535 IDWTELEM *buf = b->ibuf;
538 for (x = 1; x < b->width; x++)
542 for (y = 1; y < b->height; y++) {
543 buf[0] += buf[-b->stride];
545 for (x = 1; x < b->width; x++) {
546 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
547 buf[x] += divide3(pred);
554 * Dirac Specification ->
555 * 13.4.2 Non-skipped subbands. subband_coeffs()
557 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
559 int cb_x, cb_y, left, right, top, bottom;
562 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
563 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
564 int blockcnt_one = (cb_width + cb_height) == 2;
569 init_get_bits(&gb, b->coeff_data, b->length*8);
572 ff_dirac_init_arith_decoder(&c, &gb, b->length);
575 for (cb_y = 0; cb_y < cb_height; cb_y++) {
576 bottom = (b->height * (cb_y+1)) / cb_height;
578 for (cb_x = 0; cb_x < cb_width; cb_x++) {
579 right = (b->width * (cb_x+1)) / cb_width;
580 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
586 if (b->orientation == subband_ll && s->num_refs == 0)
587 intra_dc_prediction(b);
590 static int decode_subband_arith(AVCodecContext *avctx, void *b)
592 DiracContext *s = avctx->priv_data;
593 decode_subband_internal(s, b, 1);
597 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
599 DiracContext *s = avctx->priv_data;
601 decode_subband_internal(s, *b, 0);
606 * Dirac Specification ->
607 * [DIRAC_STD] 13.4.1 core_transform_data()
609 static void decode_component(DiracContext *s, int comp)
611 AVCodecContext *avctx = s->avctx;
612 SubBand *bands[3*MAX_DWT_LEVELS+1];
613 enum dirac_subband orientation;
614 int level, num_bands = 0;
616 /* Unpack all subbands at all levels. */
617 for (level = 0; level < s->wavelet_depth; level++) {
618 for (orientation = !!level; orientation < 4; orientation++) {
619 SubBand *b = &s->plane[comp].band[level][orientation];
620 bands[num_bands++] = b;
622 align_get_bits(&s->gb);
623 /* [DIRAC_STD] 13.4.2 subband() */
624 b->length = svq3_get_ue_golomb(&s->gb);
626 b->quant = svq3_get_ue_golomb(&s->gb);
627 align_get_bits(&s->gb);
628 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
629 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
630 skip_bits_long(&s->gb, b->length*8);
633 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
635 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
636 NULL, 4-!!level, sizeof(SubBand));
638 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
640 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
643 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
644 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
645 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
646 int slice_x, int slice_y, int bits_end,
647 SubBand *b1, SubBand *b2)
649 int left = b1->width * slice_x / s->lowdelay.num_x;
650 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
651 int top = b1->height * slice_y / s->lowdelay.num_y;
652 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
654 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
655 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
657 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
658 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
660 /* we have to constantly check for overread since the spec explictly
661 requires this, with the meaning that all remaining coeffs are set to 0 */
662 if (get_bits_count(gb) >= bits_end)
665 for (y = top; y < bottom; y++) {
666 for (x = left; x < right; x++) {
667 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
668 if (get_bits_count(gb) >= bits_end)
671 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
672 if (get_bits_count(gb) >= bits_end)
682 struct lowdelay_slice {
691 * Dirac Specification ->
692 * 13.5.2 Slices. slice(sx,sy)
694 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
696 DiracContext *s = avctx->priv_data;
697 struct lowdelay_slice *slice = arg;
698 GetBitContext *gb = &slice->gb;
699 enum dirac_subband orientation;
700 int level, quant, chroma_bits, chroma_end;
702 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
703 int length_bits = av_log2(8 * slice->bytes)+1;
704 int luma_bits = get_bits_long(gb, length_bits);
705 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
707 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
708 for (level = 0; level < s->wavelet_depth; level++)
709 for (orientation = !!level; orientation < 4; orientation++) {
710 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
711 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
712 &s->plane[0].band[level][orientation], NULL);
715 /* consume any unused bits from luma */
716 skip_bits_long(gb, get_bits_count(gb) - luma_end);
718 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
719 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
720 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
721 for (level = 0; level < s->wavelet_depth; level++)
722 for (orientation = !!level; orientation < 4; orientation++) {
723 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
724 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
725 &s->plane[1].band[level][orientation],
726 &s->plane[2].band[level][orientation]);
733 * Dirac Specification ->
734 * 13.5.1 low_delay_transform_data()
736 static void decode_lowdelay(DiracContext *s)
738 AVCodecContext *avctx = s->avctx;
739 int slice_x, slice_y, bytes, bufsize;
741 struct lowdelay_slice *slices;
744 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
746 align_get_bits(&s->gb);
747 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
748 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
749 bufsize = get_bits_left(&s->gb);
751 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
752 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
753 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
754 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
756 slices[slice_num].bytes = bytes;
757 slices[slice_num].slice_x = slice_x;
758 slices[slice_num].slice_y = slice_y;
759 init_get_bits(&slices[slice_num].gb, buf, bufsize);
766 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
767 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
768 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
769 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
770 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
774 static void init_planes(DiracContext *s)
776 int i, w, h, level, orientation;
778 for (i = 0; i < 3; i++) {
779 Plane *p = &s->plane[i];
781 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
782 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
783 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
784 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
785 p->idwt_stride = FFALIGN(p->idwt_width, 8);
787 for (level = s->wavelet_depth-1; level >= 0; level--) {
790 for (orientation = !!level; orientation < 4; orientation++) {
791 SubBand *b = &p->band[level][orientation];
793 b->ibuf = p->idwt_buf;
795 b->stride = p->idwt_stride << (s->wavelet_depth - level);
798 b->orientation = orientation;
803 b->ibuf += b->stride>>1;
806 b->parent = &p->band[level-1][orientation];
811 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
812 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
813 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
814 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
817 p->xoffset = (p->xblen - p->xbsep)/2;
818 p->yoffset = (p->yblen - p->ybsep)/2;
823 * Unpack the motion compensation parameters
824 * Dirac Specification ->
825 * 11.2 Picture prediction data. picture_prediction()
827 static int dirac_unpack_prediction_parameters(DiracContext *s)
829 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
830 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
832 GetBitContext *gb = &s->gb;
836 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
837 /* Luma and Chroma are equal. 11.2.3 */
838 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
841 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
846 s->plane[0].xblen = svq3_get_ue_golomb(gb);
847 s->plane[0].yblen = svq3_get_ue_golomb(gb);
848 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
849 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
851 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
852 s->plane[0].xblen = default_blen[idx-1];
853 s->plane[0].yblen = default_blen[idx-1];
854 s->plane[0].xbsep = default_bsep[idx-1];
855 s->plane[0].ybsep = default_bsep[idx-1];
857 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
858 Calculated in function dirac_unpack_block_motion_data */
860 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
861 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
864 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
865 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
868 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
869 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
873 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
874 Read motion vector precision */
875 s->mv_precision = svq3_get_ue_golomb(gb);
876 if (s->mv_precision > 3) {
877 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
881 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
882 Read the global motion compensation parameters */
883 s->globalmc_flag = get_bits1(gb);
884 if (s->globalmc_flag) {
885 memset(s->globalmc, 0, sizeof(s->globalmc));
886 /* [DIRAC_STD] pan_tilt(gparams) */
887 for (ref = 0; ref < s->num_refs; ref++) {
889 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
890 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
892 /* [DIRAC_STD] zoom_rotate_shear(gparams)
893 zoom/rotation/shear parameters */
895 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
896 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
897 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
898 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
899 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
901 s->globalmc[ref].zrs[0][0] = 1;
902 s->globalmc[ref].zrs[1][1] = 1;
904 /* [DIRAC_STD] perspective(gparams) */
906 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
907 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
908 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
913 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
914 Picture prediction mode, not currently used. */
915 if (svq3_get_ue_golomb(gb)) {
916 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
920 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
921 just data read, weight calculation will be done later on. */
922 s->weight_log2denom = 1;
927 s->weight_log2denom = svq3_get_ue_golomb(gb);
928 s->weight[0] = dirac_get_se_golomb(gb);
929 if (s->num_refs == 2)
930 s->weight[1] = dirac_get_se_golomb(gb);
936 * Dirac Specification ->
937 * 11.3 Wavelet transform data. wavelet_transform()
939 static int dirac_unpack_idwt_params(DiracContext *s)
941 GetBitContext *gb = &s->gb;
945 #define CHECKEDREAD(dst, cond, errmsg) \
946 tmp = svq3_get_ue_golomb(gb); \
948 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
955 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
959 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
960 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
962 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
965 /* Codeblock parameters (core syntax only) */
967 for (i = 0; i <= s->wavelet_depth; i++) {
968 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
969 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
972 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
974 for (i = 0; i <= s->wavelet_depth; i++)
975 s->codeblock[i].width = s->codeblock[i].height = 1;
977 /* Slice parameters + quantization matrix*/
978 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
979 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
980 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
981 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
982 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
984 if (s->lowdelay.bytes.den <= 0) {
985 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
986 return AVERROR_INVALIDDATA;
989 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
991 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
992 /* custom quantization matrix */
993 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
994 for (level = 0; level < s->wavelet_depth; level++) {
995 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
996 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
997 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1000 if (s->wavelet_depth > 4) {
1001 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1002 return AVERROR_INVALIDDATA;
1004 /* default quantization matrix */
1005 for (level = 0; level < s->wavelet_depth; level++)
1006 for (i = 0; i < 4; i++) {
1007 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1008 /* haar with no shift differs for different depths */
1009 if (s->wavelet_idx == 3)
1010 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1017 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1019 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1026 return sbsplit[-stride];
1028 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1031 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1038 return block[-1].ref & refmask;
1040 return block[-stride].ref & refmask;
1042 /* return the majority */
1043 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1044 return (pred >> 1) & refmask;
1047 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1051 memset(block->u.dc, 0, sizeof(block->u.dc));
1053 if (x && !(block[-1].ref & 3)) {
1054 for (i = 0; i < 3; i++)
1055 block->u.dc[i] += block[-1].u.dc[i];
1059 if (y && !(block[-stride].ref & 3)) {
1060 for (i = 0; i < 3; i++)
1061 block->u.dc[i] += block[-stride].u.dc[i];
1065 if (x && y && !(block[-1-stride].ref & 3)) {
1066 for (i = 0; i < 3; i++)
1067 block->u.dc[i] += block[-1-stride].u.dc[i];
1072 for (i = 0; i < 3; i++)
1073 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1074 } else if (n == 3) {
1075 for (i = 0; i < 3; i++)
1076 block->u.dc[i] = divide3(block->u.dc[i]);
1080 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1083 int refmask = ref+1;
1084 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1087 if (x && (block[-1].ref & mask) == refmask)
1088 pred[n++] = block[-1].u.mv[ref];
1090 if (y && (block[-stride].ref & mask) == refmask)
1091 pred[n++] = block[-stride].u.mv[ref];
1093 if (x && y && (block[-stride-1].ref & mask) == refmask)
1094 pred[n++] = block[-stride-1].u.mv[ref];
1098 block->u.mv[ref][0] = 0;
1099 block->u.mv[ref][1] = 0;
1102 block->u.mv[ref][0] = pred[0][0];
1103 block->u.mv[ref][1] = pred[0][1];
1106 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1107 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1110 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1111 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1116 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1118 int ez = s->globalmc[ref].zrs_exp;
1119 int ep = s->globalmc[ref].perspective_exp;
1120 int (*A)[2] = s->globalmc[ref].zrs;
1121 int *b = s->globalmc[ref].pan_tilt;
1122 int *c = s->globalmc[ref].perspective;
1124 int m = (1<<ep) - (c[0]*x + c[1]*y);
1125 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1126 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1128 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1129 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1132 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1133 int stride, int x, int y)
1137 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1138 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1140 if (s->num_refs == 2) {
1141 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1142 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1146 pred_block_dc(block, stride, x, y);
1147 for (i = 0; i < 3; i++)
1148 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1152 if (s->globalmc_flag) {
1153 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1154 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1157 for (i = 0; i < s->num_refs; i++)
1158 if (block->ref & (i+1)) {
1159 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1160 global_mv(s, block, x, y, i);
1162 pred_mv(block, stride, x, y, i);
1163 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1164 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1170 * Copies the current block to the other blocks covered by the current superblock split mode
1172 static void propagate_block_data(DiracBlock *block, int stride, int size)
1175 DiracBlock *dst = block;
1177 for (x = 1; x < size; x++)
1180 for (y = 1; y < size; y++) {
1182 for (x = 0; x < size; x++)
1188 * Dirac Specification ->
1189 * 12. Block motion data syntax
1191 static int dirac_unpack_block_motion_data(DiracContext *s)
1193 GetBitContext *gb = &s->gb;
1194 uint8_t *sbsplit = s->sbsplit;
1196 DiracArith arith[8];
1200 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1201 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1202 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1203 s->blwidth = 4 * s->sbwidth;
1204 s->blheight = 4 * s->sbheight;
1206 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1207 decode superblock split modes */
1208 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1209 for (y = 0; y < s->sbheight; y++) {
1210 for (x = 0; x < s->sbwidth; x++) {
1211 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1214 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1216 sbsplit += s->sbwidth;
1219 /* setup arith decoding */
1220 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1221 for (i = 0; i < s->num_refs; i++) {
1222 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1223 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1225 for (i = 0; i < 3; i++)
1226 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1228 for (y = 0; y < s->sbheight; y++)
1229 for (x = 0; x < s->sbwidth; x++) {
1230 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1231 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1233 for (q = 0; q < blkcnt; q++)
1234 for (p = 0; p < blkcnt; p++) {
1235 int bx = 4 * x + p*step;
1236 int by = 4 * y + q*step;
1237 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1238 decode_block_params(s, arith, block, s->blwidth, bx, by);
1239 propagate_block_data(block, s->blwidth, step);
1246 static int weight(int i, int blen, int offset)
1248 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1249 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1253 else if (i > blen-1 - 2*offset)
1254 return ROLLOFF(blen-1 - i);
1258 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1259 int left, int right, int wy)
1262 for (x = 0; left && x < p->xblen >> 1; x++)
1263 obmc_weight[x] = wy*8;
1264 for (; x < p->xblen >> right; x++)
1265 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1266 for (; x < p->xblen; x++)
1267 obmc_weight[x] = wy*8;
1268 for (; x < stride; x++)
1272 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1273 int left, int right, int top, int bottom)
1276 for (y = 0; top && y < p->yblen >> 1; y++) {
1277 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1278 obmc_weight += stride;
1280 for (; y < p->yblen >> bottom; y++) {
1281 int wy = weight(y, p->yblen, p->yoffset);
1282 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1283 obmc_weight += stride;
1285 for (; y < p->yblen; y++) {
1286 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1287 obmc_weight += stride;
1291 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1294 int bottom = by == s->blheight-1;
1296 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1297 if (top || bottom || by == 1) {
1298 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1299 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1300 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1304 static const uint8_t epel_weights[4][4][4] = {
1324 * For block x,y, determine which of the hpel planes to do bilinear
1325 * interpolation from and set src[] to the location in each hpel plane
1328 * @return the index of the put_dirac_pixels_tab function to use
1329 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1331 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1332 int x, int y, int ref, int plane)
1334 Plane *p = &s->plane[plane];
1335 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1336 int motion_x = block->u.mv[ref][0];
1337 int motion_y = block->u.mv[ref][1];
1338 int mx, my, i, epel, nplanes = 0;
1341 motion_x >>= s->chroma_x_shift;
1342 motion_y >>= s->chroma_y_shift;
1345 mx = motion_x & ~(-1 << s->mv_precision);
1346 my = motion_y & ~(-1 << s->mv_precision);
1347 motion_x >>= s->mv_precision;
1348 motion_y >>= s->mv_precision;
1349 /* normalize subpel coordinates to epel */
1350 /* TODO: template this function? */
1351 mx <<= 3 - s->mv_precision;
1352 my <<= 3 - s->mv_precision;
1361 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1365 for (i = 0; i < 4; i++)
1366 src[i] = ref_hpel[i] + y*p->stride + x;
1368 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1369 we increment x/y because the edge changes for half of the pixels */
1376 src[0] += p->stride;
1377 src[1] += p->stride;
1385 /* check if we really only need 2 planes since either mx or my is
1386 a hpel position. (epel weights of 0 handle this there) */
1388 /* mx == 0: average [0] and [2]
1389 mx == 4: average [1] and [3] */
1390 src[!mx] = src[2 + !!mx];
1392 } else if (!(my&3)) {
1393 src[0] = src[(my>>1) ];
1394 src[1] = src[(my>>1)+1];
1398 /* adjust the ordering if needed so the weights work */
1400 FFSWAP(const uint8_t *, src[0], src[1]);
1401 FFSWAP(const uint8_t *, src[2], src[3]);
1404 FFSWAP(const uint8_t *, src[0], src[2]);
1405 FFSWAP(const uint8_t *, src[1], src[3]);
1407 src[4] = epel_weights[my&3][mx&3];
1411 /* fixme: v/h _edge_pos */
1412 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1413 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1415 for (i = 0; i < nplanes; i++) {
1416 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1417 p->xblen, p->yblen, x, y,
1418 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1419 src[i] = s->edge_emu_buffer[i];
1422 return (nplanes>>1) + epel;
1425 static void add_dc(uint16_t *dst, int dc, int stride,
1426 uint8_t *obmc_weight, int xblen, int yblen)
1431 for (y = 0; y < yblen; y++) {
1432 for (x = 0; x < xblen; x += 2) {
1433 dst[x ] += dc * obmc_weight[x ];
1434 dst[x+1] += dc * obmc_weight[x+1];
1437 obmc_weight += MAX_BLOCKSIZE;
1441 static void block_mc(DiracContext *s, DiracBlock *block,
1442 uint16_t *mctmp, uint8_t *obmc_weight,
1443 int plane, int dstx, int dsty)
1445 Plane *p = &s->plane[plane];
1446 const uint8_t *src[5];
1449 switch (block->ref&3) {
1451 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1455 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1456 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1458 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1459 s->weight[0] + s->weight[1], p->yblen);
1462 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1463 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1464 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1465 if (s->biweight_func) {
1466 /* fixme: +32 is a quick hack */
1467 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1468 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1469 s->weight[0], s->weight[1], p->yblen);
1471 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1474 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1477 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1479 Plane *p = &s->plane[plane];
1480 int x, dstx = p->xbsep - p->xoffset;
1482 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1485 for (x = 1; x < s->blwidth-1; x++) {
1486 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1490 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1493 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1501 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1502 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1503 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1504 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1505 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1506 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1508 s->weight_func = NULL;
1509 s->biweight_func = NULL;
1513 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1515 /* chroma allocates an edge of 8 when subsampled
1516 which for 4:2:2 means an h edge of 16 and v edge of 8
1517 just use 8 for everything for the moment */
1518 int i, edge = EDGE_WIDTH/2;
1520 ref->hpel[plane][0] = ref->avframe.data[plane];
1521 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1523 /* no need for hpel if we only have fpel vectors */
1524 if (!s->mv_precision)
1527 for (i = 1; i < 4; i++) {
1528 if (!ref->hpel_base[plane][i])
1529 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1530 /* we need to be 16-byte aligned even for chroma */
1531 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1534 if (!ref->interpolated[plane]) {
1535 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1536 ref->hpel[plane][3], ref->hpel[plane][0],
1537 ref->avframe.linesize[plane], width, height);
1538 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1539 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1540 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1542 ref->interpolated[plane] = 1;
1546 * Dirac Specification ->
1547 * 13.0 Transform data syntax. transform_data()
1549 static int dirac_decode_frame_internal(DiracContext *s)
1552 int y, i, comp, dsty;
1555 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1556 for (comp = 0; comp < 3; comp++) {
1557 Plane *p = &s->plane[comp];
1558 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1564 for (comp = 0; comp < 3; comp++) {
1565 Plane *p = &s->plane[comp];
1566 uint8_t *frame = s->current_picture->avframe.data[comp];
1568 /* FIXME: small resolutions */
1569 for (i = 0; i < 4; i++)
1570 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1572 if (!s->zero_res && !s->low_delay)
1574 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1575 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1577 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1578 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1581 if (!s->num_refs) { /* intra */
1582 for (y = 0; y < p->height; y += 16) {
1583 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1584 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1585 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1587 } else { /* inter */
1588 int rowheight = p->ybsep*p->stride;
1590 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1592 for (i = 0; i < s->num_refs; i++)
1593 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1595 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1598 for (y = 0; y < s->blheight; y++) {
1600 start = FFMAX(dsty, 0);
1601 uint16_t *mctmp = s->mctmp + y*rowheight;
1602 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1604 init_obmc_weights(s, p, y);
1606 if (y == s->blheight-1 || start+p->ybsep > p->height)
1607 h = p->height - start;
1609 h = p->ybsep - (start - dsty);
1613 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1614 mc_row(s, blocks, mctmp, comp, dsty);
1616 mctmp += (start - dsty)*p->stride + p->xoffset;
1617 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1618 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1619 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1631 * Dirac Specification ->
1632 * 11.1.1 Picture Header. picture_header()
1634 static int dirac_decode_picture_header(DiracContext *s)
1637 int i, j, refnum, refdist;
1638 GetBitContext *gb = &s->gb;
1640 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1641 picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1644 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1646 /* if this is the first keyframe after a sequence header, start our
1647 reordering from here */
1648 if (s->frame_number < 0)
1649 s->frame_number = picnum;
1651 s->ref_pics[0] = s->ref_pics[1] = NULL;
1652 for (i = 0; i < s->num_refs; i++) {
1653 refnum = picnum + dirac_get_se_golomb(gb);
1656 /* find the closest reference to the one we want */
1657 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1658 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1659 if (s->ref_frames[j]
1660 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1661 s->ref_pics[i] = s->ref_frames[j];
1662 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1665 if (!s->ref_pics[i] || refdist)
1666 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1668 /* if there were no references at all, allocate one */
1669 if (!s->ref_pics[i])
1670 for (j = 0; j < MAX_FRAMES; j++)
1671 if (!s->all_frames[j].avframe.data[0]) {
1672 s->ref_pics[i] = &s->all_frames[j];
1673 ff_get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1678 /* retire the reference frames that are not used anymore */
1679 if (s->current_picture->avframe.reference) {
1680 retire = picnum + dirac_get_se_golomb(gb);
1681 if (retire != picnum) {
1682 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1685 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1687 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1690 /* if reference array is full, remove the oldest as per the spec */
1691 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1692 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1693 remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1698 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1700 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1703 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1710 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1712 DiracFrame *out = s->delay_frames[0];
1715 /* find frame with lowest picture number */
1716 for (i = 1; s->delay_frames[i]; i++)
1717 if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1718 out = s->delay_frames[i];
1722 for (i = out_idx; s->delay_frames[i]; i++)
1723 s->delay_frames[i] = s->delay_frames[i+1];
1726 out->avframe.reference ^= DELAYED_PIC_REF;
1728 *(AVFrame *)picture = out->avframe;
1735 * Dirac Specification ->
1736 * 9.6 Parse Info Header Syntax. parse_info()
1737 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1739 #define DATA_UNIT_HEADER_SIZE 13
1741 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1742 inside the function parse_sequence() */
1743 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1745 DiracContext *s = avctx->priv_data;
1746 DiracFrame *pic = NULL;
1747 int i, parse_code = buf[4];
1750 if (size < DATA_UNIT_HEADER_SIZE)
1753 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1755 if (parse_code == pc_seq_header) {
1756 if (s->seen_sequence_header)
1759 /* [DIRAC_STD] 10. Sequence header */
1760 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1763 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1765 if (alloc_sequence_buffers(s))
1768 s->seen_sequence_header = 1;
1769 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1770 free_sequence_buffers(s);
1771 s->seen_sequence_header = 0;
1772 } else if (parse_code == pc_aux_data) {
1773 if (buf[13] == 1) { /* encoder implementation/version */
1775 /* versions older than 1.0.8 don't store quant delta for
1776 subbands with only one codeblock */
1777 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1778 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1779 s->old_delta_quant = 1;
1781 } else if (parse_code & 0x8) { /* picture data unit */
1782 if (!s->seen_sequence_header) {
1783 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1787 /* find an unused frame */
1788 for (i = 0; i < MAX_FRAMES; i++)
1789 if (s->all_frames[i].avframe.data[0] == NULL)
1790 pic = &s->all_frames[i];
1792 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1796 avcodec_get_frame_defaults(&pic->avframe);
1798 /* [DIRAC_STD] Defined in 9.6.1 ... */
1799 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1801 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1805 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1806 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1807 pic->avframe.reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1808 pic->avframe.key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1809 pic->avframe.pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1811 if (ff_get_buffer(avctx, &pic->avframe) < 0) {
1812 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1815 s->current_picture = pic;
1816 s->plane[0].stride = pic->avframe.linesize[0];
1817 s->plane[1].stride = pic->avframe.linesize[1];
1818 s->plane[2].stride = pic->avframe.linesize[2];
1820 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1821 if (dirac_decode_picture_header(s))
1824 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1825 if (dirac_decode_frame_internal(s))
1831 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1833 DiracContext *s = avctx->priv_data;
1834 DiracFrame *picture = data;
1835 uint8_t *buf = pkt->data;
1836 int buf_size = pkt->size;
1837 int i, data_unit_size, buf_idx = 0;
1839 /* release unused frames */
1840 for (i = 0; i < MAX_FRAMES; i++)
1841 if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1842 avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1843 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1846 s->current_picture = NULL;
1849 /* end of stream, so flush delayed pics */
1851 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1854 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1855 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1856 BBCD start code search */
1857 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1858 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1859 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1862 /* BBCD found or end of data */
1863 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1866 data_unit_size = AV_RB32(buf+buf_idx+5);
1867 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1868 if(buf_idx + data_unit_size > buf_size)
1869 av_log(s->avctx, AV_LOG_ERROR,
1870 "Data unit with size %d is larger than input buffer, discarding\n",
1875 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1876 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1878 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1881 buf_idx += data_unit_size;
1884 if (!s->current_picture)
1887 if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1888 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1890 s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1892 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1893 int min_num = s->delay_frames[0]->avframe.display_picture_number;
1894 /* Too many delayed frames, so we display the frame with the lowest pts */
1895 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1896 delayed_frame = s->delay_frames[0];
1898 for (i = 1; s->delay_frames[i]; i++)
1899 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1900 min_num = s->delay_frames[i]->avframe.display_picture_number;
1902 delayed_frame = remove_frame(s->delay_frames, min_num);
1903 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1906 if (delayed_frame) {
1907 delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1908 *(AVFrame*)data = delayed_frame->avframe;
1911 } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1912 /* The right frame at the right time :-) */
1913 *(AVFrame*)data = s->current_picture->avframe;
1918 s->frame_number = picture->avframe.display_picture_number + 1;
1923 AVCodec ff_dirac_decoder = {
1925 .type = AVMEDIA_TYPE_VIDEO,
1926 .id = AV_CODEC_ID_DIRAC,
1927 .priv_data_size = sizeof(DiracContext),
1928 .init = dirac_decode_init,
1929 .close = dirac_decode_end,
1930 .decode = dirac_decode_frame,
1931 .capabilities = CODEC_CAP_DELAY,
1932 .flush = dirac_decode_flush,
1933 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),