2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
35 #include "dirac_arith.h"
36 #include "mpeg12data.h"
37 #include "dirac_dwt.h"
40 #include "videodsp.h" // for ff_emulated_edge_mc_8
43 * The spec limits the number of wavelet decompositions to 4 for both
44 * level 1 (VC-2) and 128 (long-gop default).
45 * 5 decompositions is the maximum before >16-bit buffers are needed.
46 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
47 * the others to 4 decompositions (or 3 for the fidelity filter).
49 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
51 #define MAX_DWT_LEVELS 5
54 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
56 #define MAX_REFERENCE_FRAMES 8
57 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
58 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
59 #define MAX_QUANT 68 /* max quant for VC-2 */
60 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
63 * DiracBlock->ref flags, if set then the block does MC from the given ref
65 #define DIRAC_REF_MASK_REF1 1
66 #define DIRAC_REF_MASK_REF2 2
67 #define DIRAC_REF_MASK_GLOBAL 4
70 * Value of Picture.reference when Picture is not a reference picture, but
71 * is held for delayed output.
73 #define DELAYED_PIC_REF 4
75 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
77 #define CALC_PADDING(size, depth) \
78 (((size + (1 << depth) - 1) >> depth) << depth)
80 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
84 int interpolated[3]; /* 1 if hpel[] is valid */
86 uint8_t *hpel_base[3][4];
93 } u; /* anonymous unions aren't in C99 :( */
97 typedef struct SubBand {
105 struct SubBand *parent;
109 const uint8_t *coeff_data;
112 typedef struct Plane {
121 IDWTELEM *idwt_buf_base;
127 /* block separation (block n+1 starts after this many pixels in block n) */
130 /* amount of overspill on each edge (half of the overlap between blocks) */
134 SubBand band[MAX_DWT_LEVELS][4];
137 typedef struct DiracContext {
138 AVCodecContext *avctx;
140 DiracDSPContext diracdsp;
142 dirac_source_params source;
143 int seen_sequence_header;
144 int frame_number; /* number of the next frame to display */
149 int zero_res; /* zero residue flag */
150 int is_arith; /* whether coeffs use arith or golomb coding */
151 int low_delay; /* use the low delay syntax */
152 int globalmc_flag; /* use global motion compensation */
153 int num_refs; /* number of reference pictures */
155 /* wavelet decoding */
156 unsigned wavelet_depth; /* depth of the IDWT */
157 unsigned wavelet_idx;
160 * schroedinger older than 1.0.8 doesn't store
161 * quant delta if only one codebook exists in a band
163 unsigned old_delta_quant;
164 unsigned codeblock_mode;
169 } codeblock[MAX_DWT_LEVELS+1];
172 unsigned num_x; /* number of horizontal slices */
173 unsigned num_y; /* number of vertical slices */
174 AVRational bytes; /* average bytes per slice */
175 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
179 int pan_tilt[2]; /* pan/tilt vector */
180 int zrs[2][2]; /* zoom/rotate/shear matrix */
181 int perspective[2]; /* perspective vector */
183 unsigned perspective_exp;
186 /* motion compensation */
187 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
188 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
189 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
191 int blwidth; /* number of blocks (horizontally) */
192 int blheight; /* number of blocks (vertically) */
193 int sbwidth; /* number of superblocks (horizontally) */
194 int sbheight; /* number of superblocks (vertically) */
197 DiracBlock *blmotion;
199 uint8_t *edge_emu_buffer[4];
200 uint8_t *edge_emu_buffer_base;
202 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
205 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
207 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
208 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
209 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
210 dirac_weight_func weight_func;
211 dirac_biweight_func biweight_func;
213 DiracFrame *current_picture;
214 DiracFrame *ref_pics[2];
216 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
217 DiracFrame *delay_frames[MAX_DELAY+1];
218 DiracFrame all_frames[MAX_FRAMES];
222 * Dirac Specification ->
223 * Parse code values. 9.6.1 Table 9.1
225 enum dirac_parse_code {
226 pc_seq_header = 0x00,
239 static const uint8_t default_qmat[][4][4] = {
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
242 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
243 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
244 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
245 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
246 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
249 static const int qscale_tab[MAX_QUANT+1] = {
250 4, 5, 6, 7, 8, 10, 11, 13,
251 16, 19, 23, 27, 32, 38, 45, 54,
252 64, 76, 91, 108, 128, 152, 181, 215,
253 256, 304, 362, 431, 512, 609, 724, 861,
254 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
255 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
256 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
260 static const int qoffset_intra_tab[MAX_QUANT+1] = {
261 1, 2, 3, 4, 4, 5, 6, 7,
262 8, 10, 12, 14, 16, 19, 23, 27,
263 32, 38, 46, 54, 64, 76, 91, 108,
264 128, 152, 181, 216, 256, 305, 362, 431,
265 512, 609, 724, 861, 1024, 1218, 1448, 1722,
266 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
267 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
271 static const int qoffset_inter_tab[MAX_QUANT+1] = {
272 1, 2, 2, 3, 3, 4, 4, 5,
273 6, 7, 9, 10, 12, 14, 17, 20,
274 24, 29, 34, 41, 48, 57, 68, 81,
275 96, 114, 136, 162, 192, 228, 272, 323,
276 384, 457, 543, 646, 768, 913, 1086, 1292,
277 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
278 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
282 /* magic number division by 3 from schroedinger */
283 static inline int divide3(int x)
285 return ((x+1)*21845 + 10922) >> 16;
288 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
290 DiracFrame *remove_pic = NULL;
291 int i, remove_idx = -1;
293 for (i = 0; framelist[i]; i++)
294 if (framelist[i]->avframe->display_picture_number == picnum) {
295 remove_pic = framelist[i];
300 for (i = remove_idx; framelist[i]; i++)
301 framelist[i] = framelist[i+1];
306 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
309 for (i = 0; i < maxframes; i++)
311 framelist[i] = frame;
317 static int alloc_sequence_buffers(DiracContext *s)
319 int sbwidth = DIVRNDUP(s->source.width, 4);
320 int sbheight = DIVRNDUP(s->source.height, 4);
321 int i, w, h, top_padding;
323 /* todo: think more about this / use or set Plane here */
324 for (i = 0; i < 3; i++) {
325 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
326 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
327 w = s->source.width >> (i ? s->chroma_x_shift : 0);
328 h = s->source.height >> (i ? s->chroma_y_shift : 0);
330 /* we allocate the max we support here since num decompositions can
331 * change from frame to frame. Stride is aligned to 16 for SIMD, and
332 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
333 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
335 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
336 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
337 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
339 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
340 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
341 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
342 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
343 return AVERROR(ENOMEM);
347 h = s->source.height;
349 /* fixme: allocate using real stride here */
350 s->sbsplit = av_malloc(sbwidth * sbheight);
351 s->blmotion = av_malloc(sbwidth * sbheight * 16 * sizeof(*s->blmotion));
352 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
354 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
355 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
357 if (!s->sbsplit || !s->blmotion || !s->mctmp || !s->mcscratch)
358 return AVERROR(ENOMEM);
362 static void free_sequence_buffers(DiracContext *s)
366 for (i = 0; i < MAX_FRAMES; i++) {
367 if (s->all_frames[i].avframe->data[0]) {
368 av_frame_unref(s->all_frames[i].avframe);
369 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
372 for (j = 0; j < 3; j++)
373 for (k = 1; k < 4; k++)
374 av_freep(&s->all_frames[i].hpel_base[j][k]);
377 memset(s->ref_frames, 0, sizeof(s->ref_frames));
378 memset(s->delay_frames, 0, sizeof(s->delay_frames));
380 for (i = 0; i < 3; i++) {
381 av_freep(&s->plane[i].idwt_buf_base);
382 av_freep(&s->plane[i].idwt_tmp);
385 av_freep(&s->sbsplit);
386 av_freep(&s->blmotion);
387 av_freep(&s->edge_emu_buffer_base);
390 av_freep(&s->mcscratch);
393 static av_cold int dirac_decode_init(AVCodecContext *avctx)
395 DiracContext *s = avctx->priv_data;
399 s->frame_number = -1;
401 ff_dsputil_init(&s->dsp, avctx);
402 ff_diracdsp_init(&s->diracdsp);
404 for (i = 0; i < MAX_FRAMES; i++) {
405 s->all_frames[i].avframe = av_frame_alloc();
406 if (!s->all_frames[i].avframe) {
408 av_frame_free(&s->all_frames[--i].avframe);
409 return AVERROR(ENOMEM);
416 static void dirac_decode_flush(AVCodecContext *avctx)
418 DiracContext *s = avctx->priv_data;
419 free_sequence_buffers(s);
420 s->seen_sequence_header = 0;
421 s->frame_number = -1;
424 static av_cold int dirac_decode_end(AVCodecContext *avctx)
426 DiracContext *s = avctx->priv_data;
429 dirac_decode_flush(avctx);
430 for (i = 0; i < MAX_FRAMES; i++)
431 av_frame_free(&s->all_frames[i].avframe);
436 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
438 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
439 SubBand *b, IDWTELEM *buf, int x, int y)
443 int pred_ctx = CTX_ZPZN_F1;
445 /* Check if the parent subband has a 0 in the corresponding position */
447 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
449 if (b->orientation == subband_hl)
450 sign_pred = buf[-b->stride];
452 /* Determine if the pixel has only zeros in its neighbourhood */
454 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
455 if (b->orientation == subband_lh)
458 pred_ctx += !buf[-b->stride];
461 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
463 coeff = (coeff * qfactor + qoffset + 2) >> 2;
464 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
465 coeff = (coeff ^ -sign) + sign;
470 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
474 coeff = svq3_get_ue_golomb(gb);
476 coeff = (coeff * qfactor + qoffset + 2) >> 2;
477 sign = get_bits1(gb);
478 coeff = (coeff ^ -sign) + sign;
484 * Decode the coeffs in the rectangle defined by left, right, top, bottom
485 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
487 static inline void codeblock(DiracContext *s, SubBand *b,
488 GetBitContext *gb, DiracArith *c,
489 int left, int right, int top, int bottom,
490 int blockcnt_one, int is_arith)
492 int x, y, zero_block;
493 int qoffset, qfactor;
496 /* check for any coded coefficients in this codeblock */
499 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
501 zero_block = get_bits1(gb);
507 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
508 int quant = b->quant;
510 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
512 quant += dirac_get_se_golomb(gb);
514 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
520 b->quant = FFMIN(b->quant, MAX_QUANT);
522 qfactor = qscale_tab[b->quant];
523 /* TODO: context pointer? */
525 qoffset = qoffset_intra_tab[b->quant];
527 qoffset = qoffset_inter_tab[b->quant];
529 buf = b->ibuf + top * b->stride;
530 for (y = top; y < bottom; y++) {
531 for (x = left; x < right; x++) {
532 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
534 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
536 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
543 * Dirac Specification ->
544 * 13.3 intra_dc_prediction(band)
546 static inline void intra_dc_prediction(SubBand *b)
548 IDWTELEM *buf = b->ibuf;
551 for (x = 1; x < b->width; x++)
555 for (y = 1; y < b->height; y++) {
556 buf[0] += buf[-b->stride];
558 for (x = 1; x < b->width; x++) {
559 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
560 buf[x] += divide3(pred);
567 * Dirac Specification ->
568 * 13.4.2 Non-skipped subbands. subband_coeffs()
570 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
572 int cb_x, cb_y, left, right, top, bottom;
575 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
576 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
577 int blockcnt_one = (cb_width + cb_height) == 2;
582 init_get_bits8(&gb, b->coeff_data, b->length);
585 ff_dirac_init_arith_decoder(&c, &gb, b->length);
588 for (cb_y = 0; cb_y < cb_height; cb_y++) {
589 bottom = (b->height * (cb_y+1)) / cb_height;
591 for (cb_x = 0; cb_x < cb_width; cb_x++) {
592 right = (b->width * (cb_x+1)) / cb_width;
593 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
599 if (b->orientation == subband_ll && s->num_refs == 0)
600 intra_dc_prediction(b);
603 static int decode_subband_arith(AVCodecContext *avctx, void *b)
605 DiracContext *s = avctx->priv_data;
606 decode_subband_internal(s, b, 1);
610 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
612 DiracContext *s = avctx->priv_data;
614 decode_subband_internal(s, *b, 0);
619 * Dirac Specification ->
620 * [DIRAC_STD] 13.4.1 core_transform_data()
622 static void decode_component(DiracContext *s, int comp)
624 AVCodecContext *avctx = s->avctx;
625 SubBand *bands[3*MAX_DWT_LEVELS+1];
626 enum dirac_subband orientation;
627 int level, num_bands = 0;
629 /* Unpack all subbands at all levels. */
630 for (level = 0; level < s->wavelet_depth; level++) {
631 for (orientation = !!level; orientation < 4; orientation++) {
632 SubBand *b = &s->plane[comp].band[level][orientation];
633 bands[num_bands++] = b;
635 align_get_bits(&s->gb);
636 /* [DIRAC_STD] 13.4.2 subband() */
637 b->length = svq3_get_ue_golomb(&s->gb);
639 b->quant = svq3_get_ue_golomb(&s->gb);
640 align_get_bits(&s->gb);
641 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
642 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
643 skip_bits_long(&s->gb, b->length*8);
646 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
648 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
649 NULL, 4-!!level, sizeof(SubBand));
651 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
653 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
656 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
657 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
658 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
659 int slice_x, int slice_y, int bits_end,
660 SubBand *b1, SubBand *b2)
662 int left = b1->width * slice_x / s->lowdelay.num_x;
663 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
664 int top = b1->height * slice_y / s->lowdelay.num_y;
665 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
667 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
668 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
670 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
671 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
673 /* we have to constantly check for overread since the spec explictly
674 requires this, with the meaning that all remaining coeffs are set to 0 */
675 if (get_bits_count(gb) >= bits_end)
678 for (y = top; y < bottom; y++) {
679 for (x = left; x < right; x++) {
680 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
681 if (get_bits_count(gb) >= bits_end)
684 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
685 if (get_bits_count(gb) >= bits_end)
695 struct lowdelay_slice {
704 * Dirac Specification ->
705 * 13.5.2 Slices. slice(sx,sy)
707 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
709 DiracContext *s = avctx->priv_data;
710 struct lowdelay_slice *slice = arg;
711 GetBitContext *gb = &slice->gb;
712 enum dirac_subband orientation;
713 int level, quant, chroma_bits, chroma_end;
715 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
716 int length_bits = av_log2(8 * slice->bytes)+1;
717 int luma_bits = get_bits_long(gb, length_bits);
718 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
720 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
721 for (level = 0; level < s->wavelet_depth; level++)
722 for (orientation = !!level; orientation < 4; orientation++) {
723 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
724 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
725 &s->plane[0].band[level][orientation], NULL);
728 /* consume any unused bits from luma */
729 skip_bits_long(gb, get_bits_count(gb) - luma_end);
731 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
732 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
733 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
734 for (level = 0; level < s->wavelet_depth; level++)
735 for (orientation = !!level; orientation < 4; orientation++) {
736 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
737 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
738 &s->plane[1].band[level][orientation],
739 &s->plane[2].band[level][orientation]);
746 * Dirac Specification ->
747 * 13.5.1 low_delay_transform_data()
749 static void decode_lowdelay(DiracContext *s)
751 AVCodecContext *avctx = s->avctx;
752 int slice_x, slice_y, bytes, bufsize;
754 struct lowdelay_slice *slices;
757 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
759 align_get_bits(&s->gb);
760 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
761 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
762 bufsize = get_bits_left(&s->gb);
764 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
765 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
766 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
767 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
769 slices[slice_num].bytes = bytes;
770 slices[slice_num].slice_x = slice_x;
771 slices[slice_num].slice_y = slice_y;
772 init_get_bits(&slices[slice_num].gb, buf, bufsize);
779 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
780 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
781 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
782 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
783 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
787 static void init_planes(DiracContext *s)
789 int i, w, h, level, orientation;
791 for (i = 0; i < 3; i++) {
792 Plane *p = &s->plane[i];
794 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
795 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
796 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
797 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
798 p->idwt_stride = FFALIGN(p->idwt_width, 8);
800 for (level = s->wavelet_depth-1; level >= 0; level--) {
803 for (orientation = !!level; orientation < 4; orientation++) {
804 SubBand *b = &p->band[level][orientation];
806 b->ibuf = p->idwt_buf;
808 b->stride = p->idwt_stride << (s->wavelet_depth - level);
811 b->orientation = orientation;
816 b->ibuf += b->stride>>1;
819 b->parent = &p->band[level-1][orientation];
824 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
825 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
826 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
827 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
830 p->xoffset = (p->xblen - p->xbsep)/2;
831 p->yoffset = (p->yblen - p->ybsep)/2;
836 * Unpack the motion compensation parameters
837 * Dirac Specification ->
838 * 11.2 Picture prediction data. picture_prediction()
840 static int dirac_unpack_prediction_parameters(DiracContext *s)
842 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
843 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
845 GetBitContext *gb = &s->gb;
849 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
850 /* Luma and Chroma are equal. 11.2.3 */
851 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
854 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
859 s->plane[0].xblen = svq3_get_ue_golomb(gb);
860 s->plane[0].yblen = svq3_get_ue_golomb(gb);
861 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
862 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
864 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
865 s->plane[0].xblen = default_blen[idx-1];
866 s->plane[0].yblen = default_blen[idx-1];
867 s->plane[0].xbsep = default_bsep[idx-1];
868 s->plane[0].ybsep = default_bsep[idx-1];
870 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
871 Calculated in function dirac_unpack_block_motion_data */
873 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
874 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
877 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
878 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
881 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
882 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
886 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
887 Read motion vector precision */
888 s->mv_precision = svq3_get_ue_golomb(gb);
889 if (s->mv_precision > 3) {
890 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
894 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
895 Read the global motion compensation parameters */
896 s->globalmc_flag = get_bits1(gb);
897 if (s->globalmc_flag) {
898 memset(s->globalmc, 0, sizeof(s->globalmc));
899 /* [DIRAC_STD] pan_tilt(gparams) */
900 for (ref = 0; ref < s->num_refs; ref++) {
902 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
903 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
905 /* [DIRAC_STD] zoom_rotate_shear(gparams)
906 zoom/rotation/shear parameters */
908 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
909 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
910 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
911 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
912 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
914 s->globalmc[ref].zrs[0][0] = 1;
915 s->globalmc[ref].zrs[1][1] = 1;
917 /* [DIRAC_STD] perspective(gparams) */
919 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
920 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
921 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
926 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
927 Picture prediction mode, not currently used. */
928 if (svq3_get_ue_golomb(gb)) {
929 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
933 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
934 just data read, weight calculation will be done later on. */
935 s->weight_log2denom = 1;
940 s->weight_log2denom = svq3_get_ue_golomb(gb);
941 s->weight[0] = dirac_get_se_golomb(gb);
942 if (s->num_refs == 2)
943 s->weight[1] = dirac_get_se_golomb(gb);
949 * Dirac Specification ->
950 * 11.3 Wavelet transform data. wavelet_transform()
952 static int dirac_unpack_idwt_params(DiracContext *s)
954 GetBitContext *gb = &s->gb;
958 #define CHECKEDREAD(dst, cond, errmsg) \
959 tmp = svq3_get_ue_golomb(gb); \
961 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
968 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
972 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
973 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
975 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
978 /* Codeblock parameters (core syntax only) */
980 for (i = 0; i <= s->wavelet_depth; i++) {
981 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
982 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
985 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
987 for (i = 0; i <= s->wavelet_depth; i++)
988 s->codeblock[i].width = s->codeblock[i].height = 1;
990 /* Slice parameters + quantization matrix*/
991 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
992 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
993 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
994 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
995 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
997 if (s->lowdelay.bytes.den <= 0) {
998 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
999 return AVERROR_INVALIDDATA;
1002 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1003 if (get_bits1(gb)) {
1004 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1005 /* custom quantization matrix */
1006 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1007 for (level = 0; level < s->wavelet_depth; level++) {
1008 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1009 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1010 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1013 if (s->wavelet_depth > 4) {
1014 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1015 return AVERROR_INVALIDDATA;
1017 /* default quantization matrix */
1018 for (level = 0; level < s->wavelet_depth; level++)
1019 for (i = 0; i < 4; i++) {
1020 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1021 /* haar with no shift differs for different depths */
1022 if (s->wavelet_idx == 3)
1023 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1030 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1032 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1039 return sbsplit[-stride];
1041 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1044 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1051 return block[-1].ref & refmask;
1053 return block[-stride].ref & refmask;
1055 /* return the majority */
1056 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1057 return (pred >> 1) & refmask;
1060 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1064 memset(block->u.dc, 0, sizeof(block->u.dc));
1066 if (x && !(block[-1].ref & 3)) {
1067 for (i = 0; i < 3; i++)
1068 block->u.dc[i] += block[-1].u.dc[i];
1072 if (y && !(block[-stride].ref & 3)) {
1073 for (i = 0; i < 3; i++)
1074 block->u.dc[i] += block[-stride].u.dc[i];
1078 if (x && y && !(block[-1-stride].ref & 3)) {
1079 for (i = 0; i < 3; i++)
1080 block->u.dc[i] += block[-1-stride].u.dc[i];
1085 for (i = 0; i < 3; i++)
1086 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1087 } else if (n == 3) {
1088 for (i = 0; i < 3; i++)
1089 block->u.dc[i] = divide3(block->u.dc[i]);
1093 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1096 int refmask = ref+1;
1097 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1100 if (x && (block[-1].ref & mask) == refmask)
1101 pred[n++] = block[-1].u.mv[ref];
1103 if (y && (block[-stride].ref & mask) == refmask)
1104 pred[n++] = block[-stride].u.mv[ref];
1106 if (x && y && (block[-stride-1].ref & mask) == refmask)
1107 pred[n++] = block[-stride-1].u.mv[ref];
1111 block->u.mv[ref][0] = 0;
1112 block->u.mv[ref][1] = 0;
1115 block->u.mv[ref][0] = pred[0][0];
1116 block->u.mv[ref][1] = pred[0][1];
1119 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1120 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1123 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1124 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1129 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1131 int ez = s->globalmc[ref].zrs_exp;
1132 int ep = s->globalmc[ref].perspective_exp;
1133 int (*A)[2] = s->globalmc[ref].zrs;
1134 int *b = s->globalmc[ref].pan_tilt;
1135 int *c = s->globalmc[ref].perspective;
1137 int m = (1<<ep) - (c[0]*x + c[1]*y);
1138 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1139 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1141 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1142 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1145 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1146 int stride, int x, int y)
1150 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1151 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1153 if (s->num_refs == 2) {
1154 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1155 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1159 pred_block_dc(block, stride, x, y);
1160 for (i = 0; i < 3; i++)
1161 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1165 if (s->globalmc_flag) {
1166 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1167 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1170 for (i = 0; i < s->num_refs; i++)
1171 if (block->ref & (i+1)) {
1172 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1173 global_mv(s, block, x, y, i);
1175 pred_mv(block, stride, x, y, i);
1176 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1177 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1183 * Copies the current block to the other blocks covered by the current superblock split mode
1185 static void propagate_block_data(DiracBlock *block, int stride, int size)
1188 DiracBlock *dst = block;
1190 for (x = 1; x < size; x++)
1193 for (y = 1; y < size; y++) {
1195 for (x = 0; x < size; x++)
1201 * Dirac Specification ->
1202 * 12. Block motion data syntax
1204 static int dirac_unpack_block_motion_data(DiracContext *s)
1206 GetBitContext *gb = &s->gb;
1207 uint8_t *sbsplit = s->sbsplit;
1209 DiracArith arith[8];
1213 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1214 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1215 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1216 s->blwidth = 4 * s->sbwidth;
1217 s->blheight = 4 * s->sbheight;
1219 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1220 decode superblock split modes */
1221 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1222 for (y = 0; y < s->sbheight; y++) {
1223 for (x = 0; x < s->sbwidth; x++) {
1224 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1227 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1229 sbsplit += s->sbwidth;
1232 /* setup arith decoding */
1233 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1234 for (i = 0; i < s->num_refs; i++) {
1235 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1236 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1238 for (i = 0; i < 3; i++)
1239 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1241 for (y = 0; y < s->sbheight; y++)
1242 for (x = 0; x < s->sbwidth; x++) {
1243 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1244 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1246 for (q = 0; q < blkcnt; q++)
1247 for (p = 0; p < blkcnt; p++) {
1248 int bx = 4 * x + p*step;
1249 int by = 4 * y + q*step;
1250 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1251 decode_block_params(s, arith, block, s->blwidth, bx, by);
1252 propagate_block_data(block, s->blwidth, step);
1259 static int weight(int i, int blen, int offset)
1261 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1262 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1266 else if (i > blen-1 - 2*offset)
1267 return ROLLOFF(blen-1 - i);
1271 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1272 int left, int right, int wy)
1275 for (x = 0; left && x < p->xblen >> 1; x++)
1276 obmc_weight[x] = wy*8;
1277 for (; x < p->xblen >> right; x++)
1278 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1279 for (; x < p->xblen; x++)
1280 obmc_weight[x] = wy*8;
1281 for (; x < stride; x++)
1285 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1286 int left, int right, int top, int bottom)
1289 for (y = 0; top && y < p->yblen >> 1; y++) {
1290 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1291 obmc_weight += stride;
1293 for (; y < p->yblen >> bottom; y++) {
1294 int wy = weight(y, p->yblen, p->yoffset);
1295 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1296 obmc_weight += stride;
1298 for (; y < p->yblen; y++) {
1299 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1300 obmc_weight += stride;
1304 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1307 int bottom = by == s->blheight-1;
1309 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1310 if (top || bottom || by == 1) {
1311 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1312 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1313 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1317 static const uint8_t epel_weights[4][4][4] = {
1337 * For block x,y, determine which of the hpel planes to do bilinear
1338 * interpolation from and set src[] to the location in each hpel plane
1341 * @return the index of the put_dirac_pixels_tab function to use
1342 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1344 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1345 int x, int y, int ref, int plane)
1347 Plane *p = &s->plane[plane];
1348 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1349 int motion_x = block->u.mv[ref][0];
1350 int motion_y = block->u.mv[ref][1];
1351 int mx, my, i, epel, nplanes = 0;
1354 motion_x >>= s->chroma_x_shift;
1355 motion_y >>= s->chroma_y_shift;
1358 mx = motion_x & ~(-1 << s->mv_precision);
1359 my = motion_y & ~(-1 << s->mv_precision);
1360 motion_x >>= s->mv_precision;
1361 motion_y >>= s->mv_precision;
1362 /* normalize subpel coordinates to epel */
1363 /* TODO: template this function? */
1364 mx <<= 3 - s->mv_precision;
1365 my <<= 3 - s->mv_precision;
1374 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1378 for (i = 0; i < 4; i++)
1379 src[i] = ref_hpel[i] + y*p->stride + x;
1381 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1382 we increment x/y because the edge changes for half of the pixels */
1389 src[0] += p->stride;
1390 src[1] += p->stride;
1398 /* check if we really only need 2 planes since either mx or my is
1399 a hpel position. (epel weights of 0 handle this there) */
1401 /* mx == 0: average [0] and [2]
1402 mx == 4: average [1] and [3] */
1403 src[!mx] = src[2 + !!mx];
1405 } else if (!(my&3)) {
1406 src[0] = src[(my>>1) ];
1407 src[1] = src[(my>>1)+1];
1411 /* adjust the ordering if needed so the weights work */
1413 FFSWAP(const uint8_t *, src[0], src[1]);
1414 FFSWAP(const uint8_t *, src[2], src[3]);
1417 FFSWAP(const uint8_t *, src[0], src[2]);
1418 FFSWAP(const uint8_t *, src[1], src[3]);
1420 src[4] = epel_weights[my&3][mx&3];
1424 /* fixme: v/h _edge_pos */
1425 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1426 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1428 for (i = 0; i < nplanes; i++) {
1429 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1430 p->stride, p->stride,
1431 p->xblen, p->yblen, x, y,
1432 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1433 src[i] = s->edge_emu_buffer[i];
1436 return (nplanes>>1) + epel;
1439 static void add_dc(uint16_t *dst, int dc, int stride,
1440 uint8_t *obmc_weight, int xblen, int yblen)
1445 for (y = 0; y < yblen; y++) {
1446 for (x = 0; x < xblen; x += 2) {
1447 dst[x ] += dc * obmc_weight[x ];
1448 dst[x+1] += dc * obmc_weight[x+1];
1451 obmc_weight += MAX_BLOCKSIZE;
1455 static void block_mc(DiracContext *s, DiracBlock *block,
1456 uint16_t *mctmp, uint8_t *obmc_weight,
1457 int plane, int dstx, int dsty)
1459 Plane *p = &s->plane[plane];
1460 const uint8_t *src[5];
1463 switch (block->ref&3) {
1465 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1469 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1470 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1472 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1473 s->weight[0] + s->weight[1], p->yblen);
1476 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1477 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1478 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1479 if (s->biweight_func) {
1480 /* fixme: +32 is a quick hack */
1481 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1482 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1483 s->weight[0], s->weight[1], p->yblen);
1485 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1488 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1491 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1493 Plane *p = &s->plane[plane];
1494 int x, dstx = p->xbsep - p->xoffset;
1496 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1499 for (x = 1; x < s->blwidth-1; x++) {
1500 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1504 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1507 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1515 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1516 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1517 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1518 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1519 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1520 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1522 s->weight_func = NULL;
1523 s->biweight_func = NULL;
1527 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1529 /* chroma allocates an edge of 8 when subsampled
1530 which for 4:2:2 means an h edge of 16 and v edge of 8
1531 just use 8 for everything for the moment */
1532 int i, edge = EDGE_WIDTH/2;
1534 ref->hpel[plane][0] = ref->avframe->data[plane];
1535 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1537 /* no need for hpel if we only have fpel vectors */
1538 if (!s->mv_precision)
1541 for (i = 1; i < 4; i++) {
1542 if (!ref->hpel_base[plane][i])
1543 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1544 /* we need to be 16-byte aligned even for chroma */
1545 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1548 if (!ref->interpolated[plane]) {
1549 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1550 ref->hpel[plane][3], ref->hpel[plane][0],
1551 ref->avframe->linesize[plane], width, height);
1552 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1553 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1554 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1556 ref->interpolated[plane] = 1;
1560 * Dirac Specification ->
1561 * 13.0 Transform data syntax. transform_data()
1563 static int dirac_decode_frame_internal(DiracContext *s)
1566 int y, i, comp, dsty;
1569 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1570 for (comp = 0; comp < 3; comp++) {
1571 Plane *p = &s->plane[comp];
1572 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1578 for (comp = 0; comp < 3; comp++) {
1579 Plane *p = &s->plane[comp];
1580 uint8_t *frame = s->current_picture->avframe->data[comp];
1582 /* FIXME: small resolutions */
1583 for (i = 0; i < 4; i++)
1584 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1586 if (!s->zero_res && !s->low_delay)
1588 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1589 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1591 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1592 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1595 if (!s->num_refs) { /* intra */
1596 for (y = 0; y < p->height; y += 16) {
1597 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1598 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1599 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1601 } else { /* inter */
1602 int rowheight = p->ybsep*p->stride;
1604 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1606 for (i = 0; i < s->num_refs; i++)
1607 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1609 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1612 for (y = 0; y < s->blheight; y++) {
1614 start = FFMAX(dsty, 0);
1615 uint16_t *mctmp = s->mctmp + y*rowheight;
1616 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1618 init_obmc_weights(s, p, y);
1620 if (y == s->blheight-1 || start+p->ybsep > p->height)
1621 h = p->height - start;
1623 h = p->ybsep - (start - dsty);
1627 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1628 mc_row(s, blocks, mctmp, comp, dsty);
1630 mctmp += (start - dsty)*p->stride + p->xoffset;
1631 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1632 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1633 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1644 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1647 int chroma_x_shift, chroma_y_shift;
1648 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1650 f->width = avctx->width + 2 * EDGE_WIDTH;
1651 f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1652 ret = ff_get_buffer(avctx, f, flags);
1656 for (i = 0; f->data[i]; i++) {
1657 int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1658 f->linesize[i] + 32;
1659 f->data[i] += offset;
1661 f->width = avctx->width;
1662 f->height = avctx->height;
1668 * Dirac Specification ->
1669 * 11.1.1 Picture Header. picture_header()
1671 static int dirac_decode_picture_header(DiracContext *s)
1674 int i, j, refnum, refdist;
1675 GetBitContext *gb = &s->gb;
1677 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1678 picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1681 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1683 /* if this is the first keyframe after a sequence header, start our
1684 reordering from here */
1685 if (s->frame_number < 0)
1686 s->frame_number = picnum;
1688 s->ref_pics[0] = s->ref_pics[1] = NULL;
1689 for (i = 0; i < s->num_refs; i++) {
1690 refnum = picnum + dirac_get_se_golomb(gb);
1693 /* find the closest reference to the one we want */
1694 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1695 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1696 if (s->ref_frames[j]
1697 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1698 s->ref_pics[i] = s->ref_frames[j];
1699 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1702 if (!s->ref_pics[i] || refdist)
1703 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1705 /* if there were no references at all, allocate one */
1706 if (!s->ref_pics[i])
1707 for (j = 0; j < MAX_FRAMES; j++)
1708 if (!s->all_frames[j].avframe->data[0]) {
1709 s->ref_pics[i] = &s->all_frames[j];
1710 get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1715 /* retire the reference frames that are not used anymore */
1716 if (s->current_picture->avframe->reference) {
1717 retire = picnum + dirac_get_se_golomb(gb);
1718 if (retire != picnum) {
1719 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1722 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1724 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1727 /* if reference array is full, remove the oldest as per the spec */
1728 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1729 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1730 remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1735 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1737 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1740 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1747 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1749 DiracFrame *out = s->delay_frames[0];
1753 /* find frame with lowest picture number */
1754 for (i = 1; s->delay_frames[i]; i++)
1755 if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1756 out = s->delay_frames[i];
1760 for (i = out_idx; s->delay_frames[i]; i++)
1761 s->delay_frames[i] = s->delay_frames[i+1];
1764 out->avframe->reference ^= DELAYED_PIC_REF;
1766 if((ret = av_frame_ref(picture, out->avframe)) < 0)
1774 * Dirac Specification ->
1775 * 9.6 Parse Info Header Syntax. parse_info()
1776 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1778 #define DATA_UNIT_HEADER_SIZE 13
1780 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1781 inside the function parse_sequence() */
1782 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1784 DiracContext *s = avctx->priv_data;
1785 DiracFrame *pic = NULL;
1786 int ret, i, parse_code = buf[4];
1789 if (size < DATA_UNIT_HEADER_SIZE)
1792 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1794 if (parse_code == pc_seq_header) {
1795 if (s->seen_sequence_header)
1798 /* [DIRAC_STD] 10. Sequence header */
1799 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1802 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1804 if (alloc_sequence_buffers(s))
1807 s->seen_sequence_header = 1;
1808 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1809 free_sequence_buffers(s);
1810 s->seen_sequence_header = 0;
1811 } else if (parse_code == pc_aux_data) {
1812 if (buf[13] == 1) { /* encoder implementation/version */
1814 /* versions older than 1.0.8 don't store quant delta for
1815 subbands with only one codeblock */
1816 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1817 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1818 s->old_delta_quant = 1;
1820 } else if (parse_code & 0x8) { /* picture data unit */
1821 if (!s->seen_sequence_header) {
1822 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1826 /* find an unused frame */
1827 for (i = 0; i < MAX_FRAMES; i++)
1828 if (s->all_frames[i].avframe->data[0] == NULL)
1829 pic = &s->all_frames[i];
1831 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1835 av_frame_unref(pic->avframe);
1837 /* [DIRAC_STD] Defined in 9.6.1 ... */
1838 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1840 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1844 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1845 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1846 pic->avframe->reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1847 pic->avframe->key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1848 pic->avframe->pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1850 if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1852 s->current_picture = pic;
1853 s->plane[0].stride = pic->avframe->linesize[0];
1854 s->plane[1].stride = pic->avframe->linesize[1];
1855 s->plane[2].stride = pic->avframe->linesize[2];
1857 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1858 if (dirac_decode_picture_header(s))
1861 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1862 if (dirac_decode_frame_internal(s))
1868 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1870 DiracContext *s = avctx->priv_data;
1871 AVFrame *picture = data;
1872 uint8_t *buf = pkt->data;
1873 int buf_size = pkt->size;
1874 int i, data_unit_size, buf_idx = 0;
1877 /* release unused frames */
1878 for (i = 0; i < MAX_FRAMES; i++)
1879 if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1880 av_frame_unref(s->all_frames[i].avframe);
1881 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1884 s->current_picture = NULL;
1887 /* end of stream, so flush delayed pics */
1889 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1892 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1893 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1894 BBCD start code search */
1895 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1896 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1897 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1900 /* BBCD found or end of data */
1901 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1904 data_unit_size = AV_RB32(buf+buf_idx+5);
1905 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1906 if(buf_idx + data_unit_size > buf_size)
1907 av_log(s->avctx, AV_LOG_ERROR,
1908 "Data unit with size %d is larger than input buffer, discarding\n",
1913 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1914 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1916 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1919 buf_idx += data_unit_size;
1922 if (!s->current_picture)
1925 if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1926 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1928 s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1930 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1931 int min_num = s->delay_frames[0]->avframe->display_picture_number;
1932 /* Too many delayed frames, so we display the frame with the lowest pts */
1933 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1934 delayed_frame = s->delay_frames[0];
1936 for (i = 1; s->delay_frames[i]; i++)
1937 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1938 min_num = s->delay_frames[i]->avframe->display_picture_number;
1940 delayed_frame = remove_frame(s->delay_frames, min_num);
1941 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1944 if (delayed_frame) {
1945 delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1946 if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1950 } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1951 /* The right frame at the right time :-) */
1952 if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1958 s->frame_number = picture->display_picture_number + 1;
1963 AVCodec ff_dirac_decoder = {
1965 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1966 .type = AVMEDIA_TYPE_VIDEO,
1967 .id = AV_CODEC_ID_DIRAC,
1968 .priv_data_size = sizeof(DiracContext),
1969 .init = dirac_decode_init,
1970 .close = dirac_decode_end,
1971 .decode = dirac_decode_frame,
1972 .capabilities = CODEC_CAP_DELAY,
1973 .flush = dirac_decode_flush,