2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
35 #include "dirac_arith.h"
36 #include "mpeg12data.h"
37 #include "dirac_dwt.h"
40 #include "videodsp.h" // for ff_emulated_edge_mc_8
43 * The spec limits the number of wavelet decompositions to 4 for both
44 * level 1 (VC-2) and 128 (long-gop default).
45 * 5 decompositions is the maximum before >16-bit buffers are needed.
46 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
47 * the others to 4 decompositions (or 3 for the fidelity filter).
49 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
51 #define MAX_DWT_LEVELS 5
54 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
56 #define MAX_REFERENCE_FRAMES 8
57 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
58 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
59 #define MAX_QUANT 68 /* max quant for VC-2 */
60 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
63 * DiracBlock->ref flags, if set then the block does MC from the given ref
65 #define DIRAC_REF_MASK_REF1 1
66 #define DIRAC_REF_MASK_REF2 2
67 #define DIRAC_REF_MASK_GLOBAL 4
70 * Value of Picture.reference when Picture is not a reference picture, but
71 * is held for delayed output.
73 #define DELAYED_PIC_REF 4
75 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
77 #define CALC_PADDING(size, depth) \
78 (((size + (1 << depth) - 1) >> depth) << depth)
80 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
84 int interpolated[3]; /* 1 if hpel[] is valid */
86 uint8_t *hpel_base[3][4];
93 } u; /* anonymous unions aren't in C99 :( */
97 typedef struct SubBand {
105 struct SubBand *parent;
109 const uint8_t *coeff_data;
112 typedef struct Plane {
121 IDWTELEM *idwt_buf_base;
127 /* block separation (block n+1 starts after this many pixels in block n) */
130 /* amount of overspill on each edge (half of the overlap between blocks) */
134 SubBand band[MAX_DWT_LEVELS][4];
137 typedef struct DiracContext {
138 AVCodecContext *avctx;
140 DiracDSPContext diracdsp;
142 dirac_source_params source;
143 int seen_sequence_header;
144 int frame_number; /* number of the next frame to display */
149 int zero_res; /* zero residue flag */
150 int is_arith; /* whether coeffs use arith or golomb coding */
151 int low_delay; /* use the low delay syntax */
152 int globalmc_flag; /* use global motion compensation */
153 int num_refs; /* number of reference pictures */
155 /* wavelet decoding */
156 unsigned wavelet_depth; /* depth of the IDWT */
157 unsigned wavelet_idx;
160 * schroedinger older than 1.0.8 doesn't store
161 * quant delta if only one codebook exists in a band
163 unsigned old_delta_quant;
164 unsigned codeblock_mode;
169 } codeblock[MAX_DWT_LEVELS+1];
172 unsigned num_x; /* number of horizontal slices */
173 unsigned num_y; /* number of vertical slices */
174 AVRational bytes; /* average bytes per slice */
175 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
179 int pan_tilt[2]; /* pan/tilt vector */
180 int zrs[2][2]; /* zoom/rotate/shear matrix */
181 int perspective[2]; /* perspective vector */
183 unsigned perspective_exp;
186 /* motion compensation */
187 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
188 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
189 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
191 int blwidth; /* number of blocks (horizontally) */
192 int blheight; /* number of blocks (vertically) */
193 int sbwidth; /* number of superblocks (horizontally) */
194 int sbheight; /* number of superblocks (vertically) */
197 DiracBlock *blmotion;
199 uint8_t *edge_emu_buffer[4];
200 uint8_t *edge_emu_buffer_base;
202 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
205 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
207 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
208 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
209 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
210 dirac_weight_func weight_func;
211 dirac_biweight_func biweight_func;
213 DiracFrame *current_picture;
214 DiracFrame *ref_pics[2];
216 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
217 DiracFrame *delay_frames[MAX_DELAY+1];
218 DiracFrame all_frames[MAX_FRAMES];
222 * Dirac Specification ->
223 * Parse code values. 9.6.1 Table 9.1
225 enum dirac_parse_code {
226 pc_seq_header = 0x00,
239 static const uint8_t default_qmat[][4][4] = {
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
242 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
243 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
244 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
245 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
246 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
249 static const int qscale_tab[MAX_QUANT+1] = {
250 4, 5, 6, 7, 8, 10, 11, 13,
251 16, 19, 23, 27, 32, 38, 45, 54,
252 64, 76, 91, 108, 128, 152, 181, 215,
253 256, 304, 362, 431, 512, 609, 724, 861,
254 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
255 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
256 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
260 static const int qoffset_intra_tab[MAX_QUANT+1] = {
261 1, 2, 3, 4, 4, 5, 6, 7,
262 8, 10, 12, 14, 16, 19, 23, 27,
263 32, 38, 46, 54, 64, 76, 91, 108,
264 128, 152, 181, 216, 256, 305, 362, 431,
265 512, 609, 724, 861, 1024, 1218, 1448, 1722,
266 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
267 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
271 static const int qoffset_inter_tab[MAX_QUANT+1] = {
272 1, 2, 2, 3, 3, 4, 4, 5,
273 6, 7, 9, 10, 12, 14, 17, 20,
274 24, 29, 34, 41, 48, 57, 68, 81,
275 96, 114, 136, 162, 192, 228, 272, 323,
276 384, 457, 543, 646, 768, 913, 1086, 1292,
277 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
278 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
282 /* magic number division by 3 from schroedinger */
283 static inline int divide3(int x)
285 return ((x+1)*21845 + 10922) >> 16;
288 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
290 DiracFrame *remove_pic = NULL;
291 int i, remove_idx = -1;
293 for (i = 0; framelist[i]; i++)
294 if (framelist[i]->avframe->display_picture_number == picnum) {
295 remove_pic = framelist[i];
300 for (i = remove_idx; framelist[i]; i++)
301 framelist[i] = framelist[i+1];
306 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
309 for (i = 0; i < maxframes; i++)
311 framelist[i] = frame;
317 static int alloc_sequence_buffers(DiracContext *s)
319 int sbwidth = DIVRNDUP(s->source.width, 4);
320 int sbheight = DIVRNDUP(s->source.height, 4);
321 int i, w, h, top_padding;
323 /* todo: think more about this / use or set Plane here */
324 for (i = 0; i < 3; i++) {
325 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
326 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
327 w = s->source.width >> (i ? s->chroma_x_shift : 0);
328 h = s->source.height >> (i ? s->chroma_y_shift : 0);
330 /* we allocate the max we support here since num decompositions can
331 * change from frame to frame. Stride is aligned to 16 for SIMD, and
332 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
333 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
335 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
336 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
337 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
339 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
340 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
341 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
342 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
343 return AVERROR(ENOMEM);
347 h = s->source.height;
349 /* fixme: allocate using real stride here */
350 s->sbsplit = av_malloc(sbwidth * sbheight);
351 s->blmotion = av_malloc(sbwidth * sbheight * 16 * sizeof(*s->blmotion));
352 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
354 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
355 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
357 if (!s->sbsplit || !s->blmotion || !s->mctmp || !s->mcscratch)
358 return AVERROR(ENOMEM);
362 static void free_sequence_buffers(DiracContext *s)
366 for (i = 0; i < MAX_FRAMES; i++) {
367 if (s->all_frames[i].avframe->data[0]) {
368 av_frame_unref(s->all_frames[i].avframe);
369 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
372 for (j = 0; j < 3; j++)
373 for (k = 1; k < 4; k++)
374 av_freep(&s->all_frames[i].hpel_base[j][k]);
377 memset(s->ref_frames, 0, sizeof(s->ref_frames));
378 memset(s->delay_frames, 0, sizeof(s->delay_frames));
380 for (i = 0; i < 3; i++) {
381 av_freep(&s->plane[i].idwt_buf_base);
382 av_freep(&s->plane[i].idwt_tmp);
385 av_freep(&s->sbsplit);
386 av_freep(&s->blmotion);
387 av_freep(&s->edge_emu_buffer_base);
390 av_freep(&s->mcscratch);
393 static av_cold int dirac_decode_init(AVCodecContext *avctx)
395 DiracContext *s = avctx->priv_data;
399 s->frame_number = -1;
401 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
402 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
403 return AVERROR_PATCHWELCOME;
406 ff_dsputil_init(&s->dsp, avctx);
407 ff_diracdsp_init(&s->diracdsp);
409 for (i = 0; i < MAX_FRAMES; i++) {
410 s->all_frames[i].avframe = av_frame_alloc();
411 if (!s->all_frames[i].avframe) {
413 av_frame_free(&s->all_frames[--i].avframe);
414 return AVERROR(ENOMEM);
421 static void dirac_decode_flush(AVCodecContext *avctx)
423 DiracContext *s = avctx->priv_data;
424 free_sequence_buffers(s);
425 s->seen_sequence_header = 0;
426 s->frame_number = -1;
429 static av_cold int dirac_decode_end(AVCodecContext *avctx)
431 DiracContext *s = avctx->priv_data;
434 dirac_decode_flush(avctx);
435 for (i = 0; i < MAX_FRAMES; i++)
436 av_frame_free(&s->all_frames[i].avframe);
441 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
443 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
444 SubBand *b, IDWTELEM *buf, int x, int y)
448 int pred_ctx = CTX_ZPZN_F1;
450 /* Check if the parent subband has a 0 in the corresponding position */
452 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
454 if (b->orientation == subband_hl)
455 sign_pred = buf[-b->stride];
457 /* Determine if the pixel has only zeros in its neighbourhood */
459 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
460 if (b->orientation == subband_lh)
463 pred_ctx += !buf[-b->stride];
466 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
468 coeff = (coeff * qfactor + qoffset + 2) >> 2;
469 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
470 coeff = (coeff ^ -sign) + sign;
475 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
479 coeff = svq3_get_ue_golomb(gb);
481 coeff = (coeff * qfactor + qoffset + 2) >> 2;
482 sign = get_bits1(gb);
483 coeff = (coeff ^ -sign) + sign;
489 * Decode the coeffs in the rectangle defined by left, right, top, bottom
490 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
492 static inline void codeblock(DiracContext *s, SubBand *b,
493 GetBitContext *gb, DiracArith *c,
494 int left, int right, int top, int bottom,
495 int blockcnt_one, int is_arith)
497 int x, y, zero_block;
498 int qoffset, qfactor;
501 /* check for any coded coefficients in this codeblock */
504 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
506 zero_block = get_bits1(gb);
512 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
513 int quant = b->quant;
515 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
517 quant += dirac_get_se_golomb(gb);
519 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
525 b->quant = FFMIN(b->quant, MAX_QUANT);
527 qfactor = qscale_tab[b->quant];
528 /* TODO: context pointer? */
530 qoffset = qoffset_intra_tab[b->quant];
532 qoffset = qoffset_inter_tab[b->quant];
534 buf = b->ibuf + top * b->stride;
535 for (y = top; y < bottom; y++) {
536 for (x = left; x < right; x++) {
537 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
539 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
541 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
548 * Dirac Specification ->
549 * 13.3 intra_dc_prediction(band)
551 static inline void intra_dc_prediction(SubBand *b)
553 IDWTELEM *buf = b->ibuf;
556 for (x = 1; x < b->width; x++)
560 for (y = 1; y < b->height; y++) {
561 buf[0] += buf[-b->stride];
563 for (x = 1; x < b->width; x++) {
564 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
565 buf[x] += divide3(pred);
572 * Dirac Specification ->
573 * 13.4.2 Non-skipped subbands. subband_coeffs()
575 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
577 int cb_x, cb_y, left, right, top, bottom;
580 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
581 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
582 int blockcnt_one = (cb_width + cb_height) == 2;
587 init_get_bits8(&gb, b->coeff_data, b->length);
590 ff_dirac_init_arith_decoder(&c, &gb, b->length);
593 for (cb_y = 0; cb_y < cb_height; cb_y++) {
594 bottom = (b->height * (cb_y+1)) / cb_height;
596 for (cb_x = 0; cb_x < cb_width; cb_x++) {
597 right = (b->width * (cb_x+1)) / cb_width;
598 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
604 if (b->orientation == subband_ll && s->num_refs == 0)
605 intra_dc_prediction(b);
608 static int decode_subband_arith(AVCodecContext *avctx, void *b)
610 DiracContext *s = avctx->priv_data;
611 decode_subband_internal(s, b, 1);
615 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
617 DiracContext *s = avctx->priv_data;
619 decode_subband_internal(s, *b, 0);
624 * Dirac Specification ->
625 * [DIRAC_STD] 13.4.1 core_transform_data()
627 static void decode_component(DiracContext *s, int comp)
629 AVCodecContext *avctx = s->avctx;
630 SubBand *bands[3*MAX_DWT_LEVELS+1];
631 enum dirac_subband orientation;
632 int level, num_bands = 0;
634 /* Unpack all subbands at all levels. */
635 for (level = 0; level < s->wavelet_depth; level++) {
636 for (orientation = !!level; orientation < 4; orientation++) {
637 SubBand *b = &s->plane[comp].band[level][orientation];
638 bands[num_bands++] = b;
640 align_get_bits(&s->gb);
641 /* [DIRAC_STD] 13.4.2 subband() */
642 b->length = svq3_get_ue_golomb(&s->gb);
644 b->quant = svq3_get_ue_golomb(&s->gb);
645 align_get_bits(&s->gb);
646 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
647 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
648 skip_bits_long(&s->gb, b->length*8);
651 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
653 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
654 NULL, 4-!!level, sizeof(SubBand));
656 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
658 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
661 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
662 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
663 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
664 int slice_x, int slice_y, int bits_end,
665 SubBand *b1, SubBand *b2)
667 int left = b1->width * slice_x / s->lowdelay.num_x;
668 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
669 int top = b1->height * slice_y / s->lowdelay.num_y;
670 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
672 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
673 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
675 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
676 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
678 /* we have to constantly check for overread since the spec explictly
679 requires this, with the meaning that all remaining coeffs are set to 0 */
680 if (get_bits_count(gb) >= bits_end)
683 for (y = top; y < bottom; y++) {
684 for (x = left; x < right; x++) {
685 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
686 if (get_bits_count(gb) >= bits_end)
689 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
690 if (get_bits_count(gb) >= bits_end)
700 struct lowdelay_slice {
709 * Dirac Specification ->
710 * 13.5.2 Slices. slice(sx,sy)
712 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
714 DiracContext *s = avctx->priv_data;
715 struct lowdelay_slice *slice = arg;
716 GetBitContext *gb = &slice->gb;
717 enum dirac_subband orientation;
718 int level, quant, chroma_bits, chroma_end;
720 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
721 int length_bits = av_log2(8 * slice->bytes)+1;
722 int luma_bits = get_bits_long(gb, length_bits);
723 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
725 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
726 for (level = 0; level < s->wavelet_depth; level++)
727 for (orientation = !!level; orientation < 4; orientation++) {
728 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
729 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
730 &s->plane[0].band[level][orientation], NULL);
733 /* consume any unused bits from luma */
734 skip_bits_long(gb, get_bits_count(gb) - luma_end);
736 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
737 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
738 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
739 for (level = 0; level < s->wavelet_depth; level++)
740 for (orientation = !!level; orientation < 4; orientation++) {
741 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
742 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
743 &s->plane[1].band[level][orientation],
744 &s->plane[2].band[level][orientation]);
751 * Dirac Specification ->
752 * 13.5.1 low_delay_transform_data()
754 static void decode_lowdelay(DiracContext *s)
756 AVCodecContext *avctx = s->avctx;
757 int slice_x, slice_y, bytes, bufsize;
759 struct lowdelay_slice *slices;
762 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
764 align_get_bits(&s->gb);
765 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
766 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
767 bufsize = get_bits_left(&s->gb);
769 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
770 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
771 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
772 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
774 slices[slice_num].bytes = bytes;
775 slices[slice_num].slice_x = slice_x;
776 slices[slice_num].slice_y = slice_y;
777 init_get_bits(&slices[slice_num].gb, buf, bufsize);
784 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
785 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
786 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
787 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
788 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
792 static void init_planes(DiracContext *s)
794 int i, w, h, level, orientation;
796 for (i = 0; i < 3; i++) {
797 Plane *p = &s->plane[i];
799 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
800 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
801 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
802 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
803 p->idwt_stride = FFALIGN(p->idwt_width, 8);
805 for (level = s->wavelet_depth-1; level >= 0; level--) {
808 for (orientation = !!level; orientation < 4; orientation++) {
809 SubBand *b = &p->band[level][orientation];
811 b->ibuf = p->idwt_buf;
813 b->stride = p->idwt_stride << (s->wavelet_depth - level);
816 b->orientation = orientation;
821 b->ibuf += b->stride>>1;
824 b->parent = &p->band[level-1][orientation];
829 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
830 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
831 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
832 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
835 p->xoffset = (p->xblen - p->xbsep)/2;
836 p->yoffset = (p->yblen - p->ybsep)/2;
841 * Unpack the motion compensation parameters
842 * Dirac Specification ->
843 * 11.2 Picture prediction data. picture_prediction()
845 static int dirac_unpack_prediction_parameters(DiracContext *s)
847 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
848 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
850 GetBitContext *gb = &s->gb;
854 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
855 /* Luma and Chroma are equal. 11.2.3 */
856 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
859 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
864 s->plane[0].xblen = svq3_get_ue_golomb(gb);
865 s->plane[0].yblen = svq3_get_ue_golomb(gb);
866 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
867 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
869 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
870 s->plane[0].xblen = default_blen[idx-1];
871 s->plane[0].yblen = default_blen[idx-1];
872 s->plane[0].xbsep = default_bsep[idx-1];
873 s->plane[0].ybsep = default_bsep[idx-1];
875 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
876 Calculated in function dirac_unpack_block_motion_data */
878 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
879 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
882 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
883 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
886 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
887 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
891 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
892 Read motion vector precision */
893 s->mv_precision = svq3_get_ue_golomb(gb);
894 if (s->mv_precision > 3) {
895 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
899 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
900 Read the global motion compensation parameters */
901 s->globalmc_flag = get_bits1(gb);
902 if (s->globalmc_flag) {
903 memset(s->globalmc, 0, sizeof(s->globalmc));
904 /* [DIRAC_STD] pan_tilt(gparams) */
905 for (ref = 0; ref < s->num_refs; ref++) {
907 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
908 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
910 /* [DIRAC_STD] zoom_rotate_shear(gparams)
911 zoom/rotation/shear parameters */
913 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
914 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
915 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
916 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
917 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
919 s->globalmc[ref].zrs[0][0] = 1;
920 s->globalmc[ref].zrs[1][1] = 1;
922 /* [DIRAC_STD] perspective(gparams) */
924 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
925 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
926 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
931 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
932 Picture prediction mode, not currently used. */
933 if (svq3_get_ue_golomb(gb)) {
934 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
938 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
939 just data read, weight calculation will be done later on. */
940 s->weight_log2denom = 1;
945 s->weight_log2denom = svq3_get_ue_golomb(gb);
946 s->weight[0] = dirac_get_se_golomb(gb);
947 if (s->num_refs == 2)
948 s->weight[1] = dirac_get_se_golomb(gb);
954 * Dirac Specification ->
955 * 11.3 Wavelet transform data. wavelet_transform()
957 static int dirac_unpack_idwt_params(DiracContext *s)
959 GetBitContext *gb = &s->gb;
963 #define CHECKEDREAD(dst, cond, errmsg) \
964 tmp = svq3_get_ue_golomb(gb); \
966 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
973 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
977 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
978 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
980 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
983 /* Codeblock parameters (core syntax only) */
985 for (i = 0; i <= s->wavelet_depth; i++) {
986 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
987 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
990 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
992 for (i = 0; i <= s->wavelet_depth; i++)
993 s->codeblock[i].width = s->codeblock[i].height = 1;
995 /* Slice parameters + quantization matrix*/
996 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
997 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
998 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
999 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1000 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1002 if (s->lowdelay.bytes.den <= 0) {
1003 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1004 return AVERROR_INVALIDDATA;
1007 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1008 if (get_bits1(gb)) {
1009 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1010 /* custom quantization matrix */
1011 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1012 for (level = 0; level < s->wavelet_depth; level++) {
1013 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1014 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1015 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1018 if (s->wavelet_depth > 4) {
1019 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1020 return AVERROR_INVALIDDATA;
1022 /* default quantization matrix */
1023 for (level = 0; level < s->wavelet_depth; level++)
1024 for (i = 0; i < 4; i++) {
1025 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1026 /* haar with no shift differs for different depths */
1027 if (s->wavelet_idx == 3)
1028 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1035 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1037 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1044 return sbsplit[-stride];
1046 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1049 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1056 return block[-1].ref & refmask;
1058 return block[-stride].ref & refmask;
1060 /* return the majority */
1061 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1062 return (pred >> 1) & refmask;
1065 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1069 memset(block->u.dc, 0, sizeof(block->u.dc));
1071 if (x && !(block[-1].ref & 3)) {
1072 for (i = 0; i < 3; i++)
1073 block->u.dc[i] += block[-1].u.dc[i];
1077 if (y && !(block[-stride].ref & 3)) {
1078 for (i = 0; i < 3; i++)
1079 block->u.dc[i] += block[-stride].u.dc[i];
1083 if (x && y && !(block[-1-stride].ref & 3)) {
1084 for (i = 0; i < 3; i++)
1085 block->u.dc[i] += block[-1-stride].u.dc[i];
1090 for (i = 0; i < 3; i++)
1091 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1092 } else if (n == 3) {
1093 for (i = 0; i < 3; i++)
1094 block->u.dc[i] = divide3(block->u.dc[i]);
1098 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1101 int refmask = ref+1;
1102 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1105 if (x && (block[-1].ref & mask) == refmask)
1106 pred[n++] = block[-1].u.mv[ref];
1108 if (y && (block[-stride].ref & mask) == refmask)
1109 pred[n++] = block[-stride].u.mv[ref];
1111 if (x && y && (block[-stride-1].ref & mask) == refmask)
1112 pred[n++] = block[-stride-1].u.mv[ref];
1116 block->u.mv[ref][0] = 0;
1117 block->u.mv[ref][1] = 0;
1120 block->u.mv[ref][0] = pred[0][0];
1121 block->u.mv[ref][1] = pred[0][1];
1124 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1125 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1128 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1129 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1134 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1136 int ez = s->globalmc[ref].zrs_exp;
1137 int ep = s->globalmc[ref].perspective_exp;
1138 int (*A)[2] = s->globalmc[ref].zrs;
1139 int *b = s->globalmc[ref].pan_tilt;
1140 int *c = s->globalmc[ref].perspective;
1142 int m = (1<<ep) - (c[0]*x + c[1]*y);
1143 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1144 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1146 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1147 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1150 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1151 int stride, int x, int y)
1155 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1156 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1158 if (s->num_refs == 2) {
1159 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1160 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1164 pred_block_dc(block, stride, x, y);
1165 for (i = 0; i < 3; i++)
1166 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1170 if (s->globalmc_flag) {
1171 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1172 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1175 for (i = 0; i < s->num_refs; i++)
1176 if (block->ref & (i+1)) {
1177 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1178 global_mv(s, block, x, y, i);
1180 pred_mv(block, stride, x, y, i);
1181 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1182 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1188 * Copies the current block to the other blocks covered by the current superblock split mode
1190 static void propagate_block_data(DiracBlock *block, int stride, int size)
1193 DiracBlock *dst = block;
1195 for (x = 1; x < size; x++)
1198 for (y = 1; y < size; y++) {
1200 for (x = 0; x < size; x++)
1206 * Dirac Specification ->
1207 * 12. Block motion data syntax
1209 static int dirac_unpack_block_motion_data(DiracContext *s)
1211 GetBitContext *gb = &s->gb;
1212 uint8_t *sbsplit = s->sbsplit;
1214 DiracArith arith[8];
1218 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1219 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1220 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1221 s->blwidth = 4 * s->sbwidth;
1222 s->blheight = 4 * s->sbheight;
1224 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1225 decode superblock split modes */
1226 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1227 for (y = 0; y < s->sbheight; y++) {
1228 for (x = 0; x < s->sbwidth; x++) {
1229 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1232 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1234 sbsplit += s->sbwidth;
1237 /* setup arith decoding */
1238 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1239 for (i = 0; i < s->num_refs; i++) {
1240 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1241 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1243 for (i = 0; i < 3; i++)
1244 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1246 for (y = 0; y < s->sbheight; y++)
1247 for (x = 0; x < s->sbwidth; x++) {
1248 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1249 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1251 for (q = 0; q < blkcnt; q++)
1252 for (p = 0; p < blkcnt; p++) {
1253 int bx = 4 * x + p*step;
1254 int by = 4 * y + q*step;
1255 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1256 decode_block_params(s, arith, block, s->blwidth, bx, by);
1257 propagate_block_data(block, s->blwidth, step);
1264 static int weight(int i, int blen, int offset)
1266 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1267 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1271 else if (i > blen-1 - 2*offset)
1272 return ROLLOFF(blen-1 - i);
1276 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1277 int left, int right, int wy)
1280 for (x = 0; left && x < p->xblen >> 1; x++)
1281 obmc_weight[x] = wy*8;
1282 for (; x < p->xblen >> right; x++)
1283 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1284 for (; x < p->xblen; x++)
1285 obmc_weight[x] = wy*8;
1286 for (; x < stride; x++)
1290 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1291 int left, int right, int top, int bottom)
1294 for (y = 0; top && y < p->yblen >> 1; y++) {
1295 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1296 obmc_weight += stride;
1298 for (; y < p->yblen >> bottom; y++) {
1299 int wy = weight(y, p->yblen, p->yoffset);
1300 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1301 obmc_weight += stride;
1303 for (; y < p->yblen; y++) {
1304 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1305 obmc_weight += stride;
1309 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1312 int bottom = by == s->blheight-1;
1314 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1315 if (top || bottom || by == 1) {
1316 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1317 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1318 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1322 static const uint8_t epel_weights[4][4][4] = {
1342 * For block x,y, determine which of the hpel planes to do bilinear
1343 * interpolation from and set src[] to the location in each hpel plane
1346 * @return the index of the put_dirac_pixels_tab function to use
1347 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1349 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1350 int x, int y, int ref, int plane)
1352 Plane *p = &s->plane[plane];
1353 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1354 int motion_x = block->u.mv[ref][0];
1355 int motion_y = block->u.mv[ref][1];
1356 int mx, my, i, epel, nplanes = 0;
1359 motion_x >>= s->chroma_x_shift;
1360 motion_y >>= s->chroma_y_shift;
1363 mx = motion_x & ~(-1 << s->mv_precision);
1364 my = motion_y & ~(-1 << s->mv_precision);
1365 motion_x >>= s->mv_precision;
1366 motion_y >>= s->mv_precision;
1367 /* normalize subpel coordinates to epel */
1368 /* TODO: template this function? */
1369 mx <<= 3 - s->mv_precision;
1370 my <<= 3 - s->mv_precision;
1379 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1383 for (i = 0; i < 4; i++)
1384 src[i] = ref_hpel[i] + y*p->stride + x;
1386 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1387 we increment x/y because the edge changes for half of the pixels */
1394 src[0] += p->stride;
1395 src[1] += p->stride;
1403 /* check if we really only need 2 planes since either mx or my is
1404 a hpel position. (epel weights of 0 handle this there) */
1406 /* mx == 0: average [0] and [2]
1407 mx == 4: average [1] and [3] */
1408 src[!mx] = src[2 + !!mx];
1410 } else if (!(my&3)) {
1411 src[0] = src[(my>>1) ];
1412 src[1] = src[(my>>1)+1];
1416 /* adjust the ordering if needed so the weights work */
1418 FFSWAP(const uint8_t *, src[0], src[1]);
1419 FFSWAP(const uint8_t *, src[2], src[3]);
1422 FFSWAP(const uint8_t *, src[0], src[2]);
1423 FFSWAP(const uint8_t *, src[1], src[3]);
1425 src[4] = epel_weights[my&3][mx&3];
1429 /* fixme: v/h _edge_pos */
1430 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1431 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1433 for (i = 0; i < nplanes; i++) {
1434 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1435 p->stride, p->stride,
1436 p->xblen, p->yblen, x, y,
1437 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1438 src[i] = s->edge_emu_buffer[i];
1441 return (nplanes>>1) + epel;
1444 static void add_dc(uint16_t *dst, int dc, int stride,
1445 uint8_t *obmc_weight, int xblen, int yblen)
1450 for (y = 0; y < yblen; y++) {
1451 for (x = 0; x < xblen; x += 2) {
1452 dst[x ] += dc * obmc_weight[x ];
1453 dst[x+1] += dc * obmc_weight[x+1];
1456 obmc_weight += MAX_BLOCKSIZE;
1460 static void block_mc(DiracContext *s, DiracBlock *block,
1461 uint16_t *mctmp, uint8_t *obmc_weight,
1462 int plane, int dstx, int dsty)
1464 Plane *p = &s->plane[plane];
1465 const uint8_t *src[5];
1468 switch (block->ref&3) {
1470 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1474 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1475 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1477 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1478 s->weight[0] + s->weight[1], p->yblen);
1481 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1482 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1483 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1484 if (s->biweight_func) {
1485 /* fixme: +32 is a quick hack */
1486 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1487 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1488 s->weight[0], s->weight[1], p->yblen);
1490 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1493 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1496 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1498 Plane *p = &s->plane[plane];
1499 int x, dstx = p->xbsep - p->xoffset;
1501 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1504 for (x = 1; x < s->blwidth-1; x++) {
1505 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1509 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1512 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1520 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1521 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1522 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1523 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1524 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1525 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1527 s->weight_func = NULL;
1528 s->biweight_func = NULL;
1532 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1534 /* chroma allocates an edge of 8 when subsampled
1535 which for 4:2:2 means an h edge of 16 and v edge of 8
1536 just use 8 for everything for the moment */
1537 int i, edge = EDGE_WIDTH/2;
1539 ref->hpel[plane][0] = ref->avframe->data[plane];
1540 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1542 /* no need for hpel if we only have fpel vectors */
1543 if (!s->mv_precision)
1546 for (i = 1; i < 4; i++) {
1547 if (!ref->hpel_base[plane][i])
1548 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1549 /* we need to be 16-byte aligned even for chroma */
1550 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1553 if (!ref->interpolated[plane]) {
1554 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1555 ref->hpel[plane][3], ref->hpel[plane][0],
1556 ref->avframe->linesize[plane], width, height);
1557 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1558 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1559 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1561 ref->interpolated[plane] = 1;
1565 * Dirac Specification ->
1566 * 13.0 Transform data syntax. transform_data()
1568 static int dirac_decode_frame_internal(DiracContext *s)
1571 int y, i, comp, dsty;
1574 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1575 for (comp = 0; comp < 3; comp++) {
1576 Plane *p = &s->plane[comp];
1577 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1583 for (comp = 0; comp < 3; comp++) {
1584 Plane *p = &s->plane[comp];
1585 uint8_t *frame = s->current_picture->avframe->data[comp];
1587 /* FIXME: small resolutions */
1588 for (i = 0; i < 4; i++)
1589 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1591 if (!s->zero_res && !s->low_delay)
1593 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1594 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1596 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1597 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1600 if (!s->num_refs) { /* intra */
1601 for (y = 0; y < p->height; y += 16) {
1602 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1603 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1604 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1606 } else { /* inter */
1607 int rowheight = p->ybsep*p->stride;
1609 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1611 for (i = 0; i < s->num_refs; i++)
1612 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1614 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1617 for (y = 0; y < s->blheight; y++) {
1619 start = FFMAX(dsty, 0);
1620 uint16_t *mctmp = s->mctmp + y*rowheight;
1621 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1623 init_obmc_weights(s, p, y);
1625 if (y == s->blheight-1 || start+p->ybsep > p->height)
1626 h = p->height - start;
1628 h = p->ybsep - (start - dsty);
1632 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1633 mc_row(s, blocks, mctmp, comp, dsty);
1635 mctmp += (start - dsty)*p->stride + p->xoffset;
1636 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1637 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1638 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1650 * Dirac Specification ->
1651 * 11.1.1 Picture Header. picture_header()
1653 static int dirac_decode_picture_header(DiracContext *s)
1656 int i, j, refnum, refdist;
1657 GetBitContext *gb = &s->gb;
1659 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1660 picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1663 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1665 /* if this is the first keyframe after a sequence header, start our
1666 reordering from here */
1667 if (s->frame_number < 0)
1668 s->frame_number = picnum;
1670 s->ref_pics[0] = s->ref_pics[1] = NULL;
1671 for (i = 0; i < s->num_refs; i++) {
1672 refnum = picnum + dirac_get_se_golomb(gb);
1675 /* find the closest reference to the one we want */
1676 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1677 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1678 if (s->ref_frames[j]
1679 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1680 s->ref_pics[i] = s->ref_frames[j];
1681 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1684 if (!s->ref_pics[i] || refdist)
1685 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1687 /* if there were no references at all, allocate one */
1688 if (!s->ref_pics[i])
1689 for (j = 0; j < MAX_FRAMES; j++)
1690 if (!s->all_frames[j].avframe->data[0]) {
1691 s->ref_pics[i] = &s->all_frames[j];
1692 ff_get_buffer(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1697 /* retire the reference frames that are not used anymore */
1698 if (s->current_picture->avframe->reference) {
1699 retire = picnum + dirac_get_se_golomb(gb);
1700 if (retire != picnum) {
1701 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1704 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1706 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1709 /* if reference array is full, remove the oldest as per the spec */
1710 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1711 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1712 remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1717 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1719 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1722 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1729 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1731 DiracFrame *out = s->delay_frames[0];
1735 /* find frame with lowest picture number */
1736 for (i = 1; s->delay_frames[i]; i++)
1737 if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1738 out = s->delay_frames[i];
1742 for (i = out_idx; s->delay_frames[i]; i++)
1743 s->delay_frames[i] = s->delay_frames[i+1];
1746 out->avframe->reference ^= DELAYED_PIC_REF;
1748 if((ret = av_frame_ref(picture, out->avframe)) < 0)
1756 * Dirac Specification ->
1757 * 9.6 Parse Info Header Syntax. parse_info()
1758 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1760 #define DATA_UNIT_HEADER_SIZE 13
1762 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1763 inside the function parse_sequence() */
1764 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1766 DiracContext *s = avctx->priv_data;
1767 DiracFrame *pic = NULL;
1768 int ret, i, parse_code = buf[4];
1771 if (size < DATA_UNIT_HEADER_SIZE)
1774 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1776 if (parse_code == pc_seq_header) {
1777 if (s->seen_sequence_header)
1780 /* [DIRAC_STD] 10. Sequence header */
1781 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1784 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1786 if (alloc_sequence_buffers(s))
1789 s->seen_sequence_header = 1;
1790 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1791 free_sequence_buffers(s);
1792 s->seen_sequence_header = 0;
1793 } else if (parse_code == pc_aux_data) {
1794 if (buf[13] == 1) { /* encoder implementation/version */
1796 /* versions older than 1.0.8 don't store quant delta for
1797 subbands with only one codeblock */
1798 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1799 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1800 s->old_delta_quant = 1;
1802 } else if (parse_code & 0x8) { /* picture data unit */
1803 if (!s->seen_sequence_header) {
1804 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1808 /* find an unused frame */
1809 for (i = 0; i < MAX_FRAMES; i++)
1810 if (s->all_frames[i].avframe->data[0] == NULL)
1811 pic = &s->all_frames[i];
1813 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1817 av_frame_unref(pic->avframe);
1819 /* [DIRAC_STD] Defined in 9.6.1 ... */
1820 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1822 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1826 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1827 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1828 pic->avframe->reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1829 pic->avframe->key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1830 pic->avframe->pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1832 if ((ret = ff_get_buffer(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1834 s->current_picture = pic;
1835 s->plane[0].stride = pic->avframe->linesize[0];
1836 s->plane[1].stride = pic->avframe->linesize[1];
1837 s->plane[2].stride = pic->avframe->linesize[2];
1839 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1840 if (dirac_decode_picture_header(s))
1843 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1844 if (dirac_decode_frame_internal(s))
1850 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1852 DiracContext *s = avctx->priv_data;
1853 AVFrame *picture = data;
1854 uint8_t *buf = pkt->data;
1855 int buf_size = pkt->size;
1856 int i, data_unit_size, buf_idx = 0;
1859 /* release unused frames */
1860 for (i = 0; i < MAX_FRAMES; i++)
1861 if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1862 av_frame_unref(s->all_frames[i].avframe);
1863 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1866 s->current_picture = NULL;
1869 /* end of stream, so flush delayed pics */
1871 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1874 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1875 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1876 BBCD start code search */
1877 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1878 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1879 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1882 /* BBCD found or end of data */
1883 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1886 data_unit_size = AV_RB32(buf+buf_idx+5);
1887 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1888 if(buf_idx + data_unit_size > buf_size)
1889 av_log(s->avctx, AV_LOG_ERROR,
1890 "Data unit with size %d is larger than input buffer, discarding\n",
1895 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1896 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1898 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1901 buf_idx += data_unit_size;
1904 if (!s->current_picture)
1907 if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1908 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1910 s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1912 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1913 int min_num = s->delay_frames[0]->avframe->display_picture_number;
1914 /* Too many delayed frames, so we display the frame with the lowest pts */
1915 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1916 delayed_frame = s->delay_frames[0];
1918 for (i = 1; s->delay_frames[i]; i++)
1919 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1920 min_num = s->delay_frames[i]->avframe->display_picture_number;
1922 delayed_frame = remove_frame(s->delay_frames, min_num);
1923 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1926 if (delayed_frame) {
1927 delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1928 if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1932 } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1933 /* The right frame at the right time :-) */
1934 if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1940 s->frame_number = picture->display_picture_number + 1;
1945 AVCodec ff_dirac_decoder = {
1947 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1948 .type = AVMEDIA_TYPE_VIDEO,
1949 .id = AV_CODEC_ID_DIRAC,
1950 .priv_data_size = sizeof(DiracContext),
1951 .init = dirac_decode_init,
1952 .close = dirac_decode_end,
1953 .decode = dirac_decode_frame,
1954 .capabilities = CODEC_CAP_DELAY,
1955 .flush = dirac_decode_flush,