2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
35 #include "dirac_arith.h"
36 #include "mpeg12data.h"
37 #include "libavcodec/mpegvideo.h"
38 #include "mpegvideoencdsp.h"
39 #include "dirac_dwt.h"
42 #include "videodsp.h" // for ff_emulated_edge_mc_8
45 * The spec limits the number of wavelet decompositions to 4 for both
46 * level 1 (VC-2) and 128 (long-gop default).
47 * 5 decompositions is the maximum before >16-bit buffers are needed.
48 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
49 * the others to 4 decompositions (or 3 for the fidelity filter).
51 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
53 #define MAX_DWT_LEVELS 5
56 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
58 #define MAX_REFERENCE_FRAMES 8
59 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
60 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
61 #define MAX_QUANT 68 /* max quant for VC-2 */
62 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
65 * DiracBlock->ref flags, if set then the block does MC from the given ref
67 #define DIRAC_REF_MASK_REF1 1
68 #define DIRAC_REF_MASK_REF2 2
69 #define DIRAC_REF_MASK_GLOBAL 4
72 * Value of Picture.reference when Picture is not a reference picture, but
73 * is held for delayed output.
75 #define DELAYED_PIC_REF 4
77 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
79 #define CALC_PADDING(size, depth) \
80 (((size + (1 << depth) - 1) >> depth) << depth)
82 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
86 int interpolated[3]; /* 1 if hpel[] is valid */
88 uint8_t *hpel_base[3][4];
95 } u; /* anonymous unions aren't in C99 :( */
99 typedef struct SubBand {
107 struct SubBand *parent;
111 const uint8_t *coeff_data;
114 typedef struct Plane {
123 IDWTELEM *idwt_buf_base;
129 /* block separation (block n+1 starts after this many pixels in block n) */
132 /* amount of overspill on each edge (half of the overlap between blocks) */
136 SubBand band[MAX_DWT_LEVELS][4];
139 typedef struct DiracContext {
140 AVCodecContext *avctx;
142 MpegvideoEncDSPContext mpvencdsp;
143 DiracDSPContext diracdsp;
145 dirac_source_params source;
146 int seen_sequence_header;
147 int frame_number; /* number of the next frame to display */
152 int zero_res; /* zero residue flag */
153 int is_arith; /* whether coeffs use arith or golomb coding */
154 int low_delay; /* use the low delay syntax */
155 int globalmc_flag; /* use global motion compensation */
156 int num_refs; /* number of reference pictures */
158 /* wavelet decoding */
159 unsigned wavelet_depth; /* depth of the IDWT */
160 unsigned wavelet_idx;
163 * schroedinger older than 1.0.8 doesn't store
164 * quant delta if only one codebook exists in a band
166 unsigned old_delta_quant;
167 unsigned codeblock_mode;
172 } codeblock[MAX_DWT_LEVELS+1];
175 unsigned num_x; /* number of horizontal slices */
176 unsigned num_y; /* number of vertical slices */
177 AVRational bytes; /* average bytes per slice */
178 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
182 int pan_tilt[2]; /* pan/tilt vector */
183 int zrs[2][2]; /* zoom/rotate/shear matrix */
184 int perspective[2]; /* perspective vector */
186 unsigned perspective_exp;
189 /* motion compensation */
190 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
191 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
192 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
194 int blwidth; /* number of blocks (horizontally) */
195 int blheight; /* number of blocks (vertically) */
196 int sbwidth; /* number of superblocks (horizontally) */
197 int sbheight; /* number of superblocks (vertically) */
200 DiracBlock *blmotion;
202 uint8_t *edge_emu_buffer[4];
203 uint8_t *edge_emu_buffer_base;
205 uint16_t *mctmp; /* buffer holding the MC data multiplied by OBMC weights */
209 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
211 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
212 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
213 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
214 dirac_weight_func weight_func;
215 dirac_biweight_func biweight_func;
217 DiracFrame *current_picture;
218 DiracFrame *ref_pics[2];
220 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
221 DiracFrame *delay_frames[MAX_DELAY+1];
222 DiracFrame all_frames[MAX_FRAMES];
226 * Dirac Specification ->
227 * Parse code values. 9.6.1 Table 9.1
229 enum dirac_parse_code {
230 pc_seq_header = 0x00,
243 static const uint8_t default_qmat[][4][4] = {
244 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
245 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
246 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
247 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
248 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
249 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
250 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
253 static const int qscale_tab[MAX_QUANT+1] = {
254 4, 5, 6, 7, 8, 10, 11, 13,
255 16, 19, 23, 27, 32, 38, 45, 54,
256 64, 76, 91, 108, 128, 152, 181, 215,
257 256, 304, 362, 431, 512, 609, 724, 861,
258 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
259 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
260 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
264 static const int qoffset_intra_tab[MAX_QUANT+1] = {
265 1, 2, 3, 4, 4, 5, 6, 7,
266 8, 10, 12, 14, 16, 19, 23, 27,
267 32, 38, 46, 54, 64, 76, 91, 108,
268 128, 152, 181, 216, 256, 305, 362, 431,
269 512, 609, 724, 861, 1024, 1218, 1448, 1722,
270 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
271 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
275 static const int qoffset_inter_tab[MAX_QUANT+1] = {
276 1, 2, 2, 3, 3, 4, 4, 5,
277 6, 7, 9, 10, 12, 14, 17, 20,
278 24, 29, 34, 41, 48, 57, 68, 81,
279 96, 114, 136, 162, 192, 228, 272, 323,
280 384, 457, 543, 646, 768, 913, 1086, 1292,
281 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
282 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
286 /* magic number division by 3 from schroedinger */
287 static inline int divide3(int x)
289 return ((x+1)*21845 + 10922) >> 16;
292 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
294 DiracFrame *remove_pic = NULL;
295 int i, remove_idx = -1;
297 for (i = 0; framelist[i]; i++)
298 if (framelist[i]->avframe->display_picture_number == picnum) {
299 remove_pic = framelist[i];
304 for (i = remove_idx; framelist[i]; i++)
305 framelist[i] = framelist[i+1];
310 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
313 for (i = 0; i < maxframes; i++)
315 framelist[i] = frame;
321 static int alloc_sequence_buffers(DiracContext *s)
323 int sbwidth = DIVRNDUP(s->source.width, 4);
324 int sbheight = DIVRNDUP(s->source.height, 4);
325 int i, w, h, top_padding;
327 /* todo: think more about this / use or set Plane here */
328 for (i = 0; i < 3; i++) {
329 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
330 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
331 w = s->source.width >> (i ? s->chroma_x_shift : 0);
332 h = s->source.height >> (i ? s->chroma_y_shift : 0);
334 /* we allocate the max we support here since num decompositions can
335 * change from frame to frame. Stride is aligned to 16 for SIMD, and
336 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
337 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
339 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
340 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
341 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
343 s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
344 s->plane[i].idwt_tmp = av_malloc_array((w+16), sizeof(IDWTELEM));
345 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
346 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
347 return AVERROR(ENOMEM);
350 /* fixme: allocate using real stride here */
351 s->sbsplit = av_malloc_array(sbwidth, sbheight);
352 s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
354 if (!s->sbsplit || !s->blmotion)
355 return AVERROR(ENOMEM);
359 static int alloc_buffers(DiracContext *s, int stride)
361 int w = s->source.width;
362 int h = s->source.height;
364 av_assert0(stride >= w);
367 if (s->buffer_stride >= stride)
369 s->buffer_stride = 0;
371 av_freep(&s->edge_emu_buffer_base);
372 memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
374 av_freep(&s->mcscratch);
376 s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
378 s->mctmp = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
379 s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
381 if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
382 return AVERROR(ENOMEM);
384 s->buffer_stride = stride;
388 static void free_sequence_buffers(DiracContext *s)
392 for (i = 0; i < MAX_FRAMES; i++) {
393 if (s->all_frames[i].avframe->data[0]) {
394 av_frame_unref(s->all_frames[i].avframe);
395 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
398 for (j = 0; j < 3; j++)
399 for (k = 1; k < 4; k++)
400 av_freep(&s->all_frames[i].hpel_base[j][k]);
403 memset(s->ref_frames, 0, sizeof(s->ref_frames));
404 memset(s->delay_frames, 0, sizeof(s->delay_frames));
406 for (i = 0; i < 3; i++) {
407 av_freep(&s->plane[i].idwt_buf_base);
408 av_freep(&s->plane[i].idwt_tmp);
411 s->buffer_stride = 0;
412 av_freep(&s->sbsplit);
413 av_freep(&s->blmotion);
414 av_freep(&s->edge_emu_buffer_base);
417 av_freep(&s->mcscratch);
420 static av_cold int dirac_decode_init(AVCodecContext *avctx)
422 DiracContext *s = avctx->priv_data;
426 s->frame_number = -1;
428 ff_dsputil_init(&s->dsp, avctx);
429 ff_diracdsp_init(&s->diracdsp);
430 ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
432 for (i = 0; i < MAX_FRAMES; i++) {
433 s->all_frames[i].avframe = av_frame_alloc();
434 if (!s->all_frames[i].avframe) {
436 av_frame_free(&s->all_frames[--i].avframe);
437 return AVERROR(ENOMEM);
444 static void dirac_decode_flush(AVCodecContext *avctx)
446 DiracContext *s = avctx->priv_data;
447 free_sequence_buffers(s);
448 s->seen_sequence_header = 0;
449 s->frame_number = -1;
452 static av_cold int dirac_decode_end(AVCodecContext *avctx)
454 DiracContext *s = avctx->priv_data;
457 dirac_decode_flush(avctx);
458 for (i = 0; i < MAX_FRAMES; i++)
459 av_frame_free(&s->all_frames[i].avframe);
464 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
466 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
467 SubBand *b, IDWTELEM *buf, int x, int y)
471 int pred_ctx = CTX_ZPZN_F1;
473 /* Check if the parent subband has a 0 in the corresponding position */
475 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
477 if (b->orientation == subband_hl)
478 sign_pred = buf[-b->stride];
480 /* Determine if the pixel has only zeros in its neighbourhood */
482 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
483 if (b->orientation == subband_lh)
486 pred_ctx += !buf[-b->stride];
489 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
491 coeff = (coeff * qfactor + qoffset + 2) >> 2;
492 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
493 coeff = (coeff ^ -sign) + sign;
498 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
502 coeff = svq3_get_ue_golomb(gb);
504 coeff = (coeff * qfactor + qoffset + 2) >> 2;
505 sign = get_bits1(gb);
506 coeff = (coeff ^ -sign) + sign;
512 * Decode the coeffs in the rectangle defined by left, right, top, bottom
513 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
515 static inline void codeblock(DiracContext *s, SubBand *b,
516 GetBitContext *gb, DiracArith *c,
517 int left, int right, int top, int bottom,
518 int blockcnt_one, int is_arith)
520 int x, y, zero_block;
521 int qoffset, qfactor;
524 /* check for any coded coefficients in this codeblock */
527 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
529 zero_block = get_bits1(gb);
535 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
536 int quant = b->quant;
538 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
540 quant += dirac_get_se_golomb(gb);
542 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
548 b->quant = FFMIN(b->quant, MAX_QUANT);
550 qfactor = qscale_tab[b->quant];
551 /* TODO: context pointer? */
553 qoffset = qoffset_intra_tab[b->quant];
555 qoffset = qoffset_inter_tab[b->quant];
557 buf = b->ibuf + top * b->stride;
558 for (y = top; y < bottom; y++) {
559 for (x = left; x < right; x++) {
560 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
562 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
564 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
571 * Dirac Specification ->
572 * 13.3 intra_dc_prediction(band)
574 static inline void intra_dc_prediction(SubBand *b)
576 IDWTELEM *buf = b->ibuf;
579 for (x = 1; x < b->width; x++)
583 for (y = 1; y < b->height; y++) {
584 buf[0] += buf[-b->stride];
586 for (x = 1; x < b->width; x++) {
587 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
588 buf[x] += divide3(pred);
595 * Dirac Specification ->
596 * 13.4.2 Non-skipped subbands. subband_coeffs()
598 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
600 int cb_x, cb_y, left, right, top, bottom;
603 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
604 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
605 int blockcnt_one = (cb_width + cb_height) == 2;
610 init_get_bits8(&gb, b->coeff_data, b->length);
613 ff_dirac_init_arith_decoder(&c, &gb, b->length);
616 for (cb_y = 0; cb_y < cb_height; cb_y++) {
617 bottom = (b->height * (cb_y+1)) / cb_height;
619 for (cb_x = 0; cb_x < cb_width; cb_x++) {
620 right = (b->width * (cb_x+1)) / cb_width;
621 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
627 if (b->orientation == subband_ll && s->num_refs == 0)
628 intra_dc_prediction(b);
631 static int decode_subband_arith(AVCodecContext *avctx, void *b)
633 DiracContext *s = avctx->priv_data;
634 decode_subband_internal(s, b, 1);
638 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
640 DiracContext *s = avctx->priv_data;
642 decode_subband_internal(s, *b, 0);
647 * Dirac Specification ->
648 * [DIRAC_STD] 13.4.1 core_transform_data()
650 static void decode_component(DiracContext *s, int comp)
652 AVCodecContext *avctx = s->avctx;
653 SubBand *bands[3*MAX_DWT_LEVELS+1];
654 enum dirac_subband orientation;
655 int level, num_bands = 0;
657 /* Unpack all subbands at all levels. */
658 for (level = 0; level < s->wavelet_depth; level++) {
659 for (orientation = !!level; orientation < 4; orientation++) {
660 SubBand *b = &s->plane[comp].band[level][orientation];
661 bands[num_bands++] = b;
663 align_get_bits(&s->gb);
664 /* [DIRAC_STD] 13.4.2 subband() */
665 b->length = svq3_get_ue_golomb(&s->gb);
667 b->quant = svq3_get_ue_golomb(&s->gb);
668 align_get_bits(&s->gb);
669 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
670 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
671 skip_bits_long(&s->gb, b->length*8);
674 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
676 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
677 NULL, 4-!!level, sizeof(SubBand));
679 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
681 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
684 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
685 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
686 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
687 int slice_x, int slice_y, int bits_end,
688 SubBand *b1, SubBand *b2)
690 int left = b1->width * slice_x / s->lowdelay.num_x;
691 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
692 int top = b1->height * slice_y / s->lowdelay.num_y;
693 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
695 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
696 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
698 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
699 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
701 /* we have to constantly check for overread since the spec explicitly
702 requires this, with the meaning that all remaining coeffs are set to 0 */
703 if (get_bits_count(gb) >= bits_end)
706 for (y = top; y < bottom; y++) {
707 for (x = left; x < right; x++) {
708 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
709 if (get_bits_count(gb) >= bits_end)
712 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
713 if (get_bits_count(gb) >= bits_end)
723 struct lowdelay_slice {
732 * Dirac Specification ->
733 * 13.5.2 Slices. slice(sx,sy)
735 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
737 DiracContext *s = avctx->priv_data;
738 struct lowdelay_slice *slice = arg;
739 GetBitContext *gb = &slice->gb;
740 enum dirac_subband orientation;
741 int level, quant, chroma_bits, chroma_end;
743 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
744 int length_bits = av_log2(8 * slice->bytes)+1;
745 int luma_bits = get_bits_long(gb, length_bits);
746 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
748 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
749 for (level = 0; level < s->wavelet_depth; level++)
750 for (orientation = !!level; orientation < 4; orientation++) {
751 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
752 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
753 &s->plane[0].band[level][orientation], NULL);
756 /* consume any unused bits from luma */
757 skip_bits_long(gb, get_bits_count(gb) - luma_end);
759 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
760 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
761 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
762 for (level = 0; level < s->wavelet_depth; level++)
763 for (orientation = !!level; orientation < 4; orientation++) {
764 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
765 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
766 &s->plane[1].band[level][orientation],
767 &s->plane[2].band[level][orientation]);
774 * Dirac Specification ->
775 * 13.5.1 low_delay_transform_data()
777 static void decode_lowdelay(DiracContext *s)
779 AVCodecContext *avctx = s->avctx;
780 int slice_x, slice_y, bytes, bufsize;
782 struct lowdelay_slice *slices;
785 slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
787 align_get_bits(&s->gb);
788 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
789 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
790 bufsize = get_bits_left(&s->gb);
792 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
793 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
794 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
795 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
797 slices[slice_num].bytes = bytes;
798 slices[slice_num].slice_x = slice_x;
799 slices[slice_num].slice_y = slice_y;
800 init_get_bits(&slices[slice_num].gb, buf, bufsize);
807 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
808 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
809 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
810 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
811 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
815 static void init_planes(DiracContext *s)
817 int i, w, h, level, orientation;
819 for (i = 0; i < 3; i++) {
820 Plane *p = &s->plane[i];
822 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
823 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
824 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
825 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
826 p->idwt_stride = FFALIGN(p->idwt_width, 8);
828 for (level = s->wavelet_depth-1; level >= 0; level--) {
831 for (orientation = !!level; orientation < 4; orientation++) {
832 SubBand *b = &p->band[level][orientation];
834 b->ibuf = p->idwt_buf;
836 b->stride = p->idwt_stride << (s->wavelet_depth - level);
839 b->orientation = orientation;
844 b->ibuf += b->stride>>1;
847 b->parent = &p->band[level-1][orientation];
852 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
853 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
854 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
855 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
858 p->xoffset = (p->xblen - p->xbsep)/2;
859 p->yoffset = (p->yblen - p->ybsep)/2;
864 * Unpack the motion compensation parameters
865 * Dirac Specification ->
866 * 11.2 Picture prediction data. picture_prediction()
868 static int dirac_unpack_prediction_parameters(DiracContext *s)
870 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
871 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
873 GetBitContext *gb = &s->gb;
877 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
878 /* Luma and Chroma are equal. 11.2.3 */
879 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
882 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
887 s->plane[0].xblen = svq3_get_ue_golomb(gb);
888 s->plane[0].yblen = svq3_get_ue_golomb(gb);
889 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
890 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
892 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
893 s->plane[0].xblen = default_blen[idx-1];
894 s->plane[0].yblen = default_blen[idx-1];
895 s->plane[0].xbsep = default_bsep[idx-1];
896 s->plane[0].ybsep = default_bsep[idx-1];
898 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
899 Calculated in function dirac_unpack_block_motion_data */
901 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
902 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
905 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
906 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
909 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
910 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
914 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
915 Read motion vector precision */
916 s->mv_precision = svq3_get_ue_golomb(gb);
917 if (s->mv_precision > 3) {
918 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
922 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
923 Read the global motion compensation parameters */
924 s->globalmc_flag = get_bits1(gb);
925 if (s->globalmc_flag) {
926 memset(s->globalmc, 0, sizeof(s->globalmc));
927 /* [DIRAC_STD] pan_tilt(gparams) */
928 for (ref = 0; ref < s->num_refs; ref++) {
930 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
931 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
933 /* [DIRAC_STD] zoom_rotate_shear(gparams)
934 zoom/rotation/shear parameters */
936 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
937 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
938 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
939 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
940 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
942 s->globalmc[ref].zrs[0][0] = 1;
943 s->globalmc[ref].zrs[1][1] = 1;
945 /* [DIRAC_STD] perspective(gparams) */
947 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
948 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
949 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
954 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
955 Picture prediction mode, not currently used. */
956 if (svq3_get_ue_golomb(gb)) {
957 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
961 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
962 just data read, weight calculation will be done later on. */
963 s->weight_log2denom = 1;
968 s->weight_log2denom = svq3_get_ue_golomb(gb);
969 s->weight[0] = dirac_get_se_golomb(gb);
970 if (s->num_refs == 2)
971 s->weight[1] = dirac_get_se_golomb(gb);
977 * Dirac Specification ->
978 * 11.3 Wavelet transform data. wavelet_transform()
980 static int dirac_unpack_idwt_params(DiracContext *s)
982 GetBitContext *gb = &s->gb;
986 #define CHECKEDREAD(dst, cond, errmsg) \
987 tmp = svq3_get_ue_golomb(gb); \
989 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
996 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
1000 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
1001 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1003 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1005 if (!s->low_delay) {
1006 /* Codeblock parameters (core syntax only) */
1007 if (get_bits1(gb)) {
1008 for (i = 0; i <= s->wavelet_depth; i++) {
1009 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
1010 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
1013 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1015 for (i = 0; i <= s->wavelet_depth; i++)
1016 s->codeblock[i].width = s->codeblock[i].height = 1;
1018 /* Slice parameters + quantization matrix*/
1019 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1020 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
1021 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
1022 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1023 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1025 if (s->lowdelay.bytes.den <= 0) {
1026 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1027 return AVERROR_INVALIDDATA;
1030 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1031 if (get_bits1(gb)) {
1032 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1033 /* custom quantization matrix */
1034 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1035 for (level = 0; level < s->wavelet_depth; level++) {
1036 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1037 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1038 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1041 if (s->wavelet_depth > 4) {
1042 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1043 return AVERROR_INVALIDDATA;
1045 /* default quantization matrix */
1046 for (level = 0; level < s->wavelet_depth; level++)
1047 for (i = 0; i < 4; i++) {
1048 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1049 /* haar with no shift differs for different depths */
1050 if (s->wavelet_idx == 3)
1051 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1058 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1060 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1067 return sbsplit[-stride];
1069 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1072 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1079 return block[-1].ref & refmask;
1081 return block[-stride].ref & refmask;
1083 /* return the majority */
1084 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1085 return (pred >> 1) & refmask;
1088 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1092 memset(block->u.dc, 0, sizeof(block->u.dc));
1094 if (x && !(block[-1].ref & 3)) {
1095 for (i = 0; i < 3; i++)
1096 block->u.dc[i] += block[-1].u.dc[i];
1100 if (y && !(block[-stride].ref & 3)) {
1101 for (i = 0; i < 3; i++)
1102 block->u.dc[i] += block[-stride].u.dc[i];
1106 if (x && y && !(block[-1-stride].ref & 3)) {
1107 for (i = 0; i < 3; i++)
1108 block->u.dc[i] += block[-1-stride].u.dc[i];
1113 for (i = 0; i < 3; i++)
1114 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1115 } else if (n == 3) {
1116 for (i = 0; i < 3; i++)
1117 block->u.dc[i] = divide3(block->u.dc[i]);
1121 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1124 int refmask = ref+1;
1125 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1128 if (x && (block[-1].ref & mask) == refmask)
1129 pred[n++] = block[-1].u.mv[ref];
1131 if (y && (block[-stride].ref & mask) == refmask)
1132 pred[n++] = block[-stride].u.mv[ref];
1134 if (x && y && (block[-stride-1].ref & mask) == refmask)
1135 pred[n++] = block[-stride-1].u.mv[ref];
1139 block->u.mv[ref][0] = 0;
1140 block->u.mv[ref][1] = 0;
1143 block->u.mv[ref][0] = pred[0][0];
1144 block->u.mv[ref][1] = pred[0][1];
1147 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1148 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1151 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1152 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1157 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1159 int ez = s->globalmc[ref].zrs_exp;
1160 int ep = s->globalmc[ref].perspective_exp;
1161 int (*A)[2] = s->globalmc[ref].zrs;
1162 int *b = s->globalmc[ref].pan_tilt;
1163 int *c = s->globalmc[ref].perspective;
1165 int m = (1<<ep) - (c[0]*x + c[1]*y);
1166 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1167 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1169 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1170 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1173 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1174 int stride, int x, int y)
1178 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1179 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1181 if (s->num_refs == 2) {
1182 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1183 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1187 pred_block_dc(block, stride, x, y);
1188 for (i = 0; i < 3; i++)
1189 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1193 if (s->globalmc_flag) {
1194 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1195 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1198 for (i = 0; i < s->num_refs; i++)
1199 if (block->ref & (i+1)) {
1200 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1201 global_mv(s, block, x, y, i);
1203 pred_mv(block, stride, x, y, i);
1204 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1205 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1211 * Copies the current block to the other blocks covered by the current superblock split mode
1213 static void propagate_block_data(DiracBlock *block, int stride, int size)
1216 DiracBlock *dst = block;
1218 for (x = 1; x < size; x++)
1221 for (y = 1; y < size; y++) {
1223 for (x = 0; x < size; x++)
1229 * Dirac Specification ->
1230 * 12. Block motion data syntax
1232 static int dirac_unpack_block_motion_data(DiracContext *s)
1234 GetBitContext *gb = &s->gb;
1235 uint8_t *sbsplit = s->sbsplit;
1237 DiracArith arith[8];
1241 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1242 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1243 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1244 s->blwidth = 4 * s->sbwidth;
1245 s->blheight = 4 * s->sbheight;
1247 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1248 decode superblock split modes */
1249 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1250 for (y = 0; y < s->sbheight; y++) {
1251 for (x = 0; x < s->sbwidth; x++) {
1252 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1255 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1257 sbsplit += s->sbwidth;
1260 /* setup arith decoding */
1261 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1262 for (i = 0; i < s->num_refs; i++) {
1263 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1264 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1266 for (i = 0; i < 3; i++)
1267 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1269 for (y = 0; y < s->sbheight; y++)
1270 for (x = 0; x < s->sbwidth; x++) {
1271 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1272 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1274 for (q = 0; q < blkcnt; q++)
1275 for (p = 0; p < blkcnt; p++) {
1276 int bx = 4 * x + p*step;
1277 int by = 4 * y + q*step;
1278 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1279 decode_block_params(s, arith, block, s->blwidth, bx, by);
1280 propagate_block_data(block, s->blwidth, step);
1287 static int weight(int i, int blen, int offset)
1289 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1290 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1294 else if (i > blen-1 - 2*offset)
1295 return ROLLOFF(blen-1 - i);
1299 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1300 int left, int right, int wy)
1303 for (x = 0; left && x < p->xblen >> 1; x++)
1304 obmc_weight[x] = wy*8;
1305 for (; x < p->xblen >> right; x++)
1306 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1307 for (; x < p->xblen; x++)
1308 obmc_weight[x] = wy*8;
1309 for (; x < stride; x++)
1313 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1314 int left, int right, int top, int bottom)
1317 for (y = 0; top && y < p->yblen >> 1; y++) {
1318 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1319 obmc_weight += stride;
1321 for (; y < p->yblen >> bottom; y++) {
1322 int wy = weight(y, p->yblen, p->yoffset);
1323 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1324 obmc_weight += stride;
1326 for (; y < p->yblen; y++) {
1327 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1328 obmc_weight += stride;
1332 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1335 int bottom = by == s->blheight-1;
1337 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1338 if (top || bottom || by == 1) {
1339 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1340 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1341 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1345 static const uint8_t epel_weights[4][4][4] = {
1365 * For block x,y, determine which of the hpel planes to do bilinear
1366 * interpolation from and set src[] to the location in each hpel plane
1369 * @return the index of the put_dirac_pixels_tab function to use
1370 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1372 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1373 int x, int y, int ref, int plane)
1375 Plane *p = &s->plane[plane];
1376 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1377 int motion_x = block->u.mv[ref][0];
1378 int motion_y = block->u.mv[ref][1];
1379 int mx, my, i, epel, nplanes = 0;
1382 motion_x >>= s->chroma_x_shift;
1383 motion_y >>= s->chroma_y_shift;
1386 mx = motion_x & ~(-1U << s->mv_precision);
1387 my = motion_y & ~(-1U << s->mv_precision);
1388 motion_x >>= s->mv_precision;
1389 motion_y >>= s->mv_precision;
1390 /* normalize subpel coordinates to epel */
1391 /* TODO: template this function? */
1392 mx <<= 3 - s->mv_precision;
1393 my <<= 3 - s->mv_precision;
1402 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1406 for (i = 0; i < 4; i++)
1407 src[i] = ref_hpel[i] + y*p->stride + x;
1409 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1410 we increment x/y because the edge changes for half of the pixels */
1417 src[0] += p->stride;
1418 src[1] += p->stride;
1426 /* check if we really only need 2 planes since either mx or my is
1427 a hpel position. (epel weights of 0 handle this there) */
1429 /* mx == 0: average [0] and [2]
1430 mx == 4: average [1] and [3] */
1431 src[!mx] = src[2 + !!mx];
1433 } else if (!(my&3)) {
1434 src[0] = src[(my>>1) ];
1435 src[1] = src[(my>>1)+1];
1439 /* adjust the ordering if needed so the weights work */
1441 FFSWAP(const uint8_t *, src[0], src[1]);
1442 FFSWAP(const uint8_t *, src[2], src[3]);
1445 FFSWAP(const uint8_t *, src[0], src[2]);
1446 FFSWAP(const uint8_t *, src[1], src[3]);
1448 src[4] = epel_weights[my&3][mx&3];
1452 /* fixme: v/h _edge_pos */
1453 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1454 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1456 for (i = 0; i < nplanes; i++) {
1457 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1458 p->stride, p->stride,
1459 p->xblen, p->yblen, x, y,
1460 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1461 src[i] = s->edge_emu_buffer[i];
1464 return (nplanes>>1) + epel;
1467 static void add_dc(uint16_t *dst, int dc, int stride,
1468 uint8_t *obmc_weight, int xblen, int yblen)
1473 for (y = 0; y < yblen; y++) {
1474 for (x = 0; x < xblen; x += 2) {
1475 dst[x ] += dc * obmc_weight[x ];
1476 dst[x+1] += dc * obmc_weight[x+1];
1479 obmc_weight += MAX_BLOCKSIZE;
1483 static void block_mc(DiracContext *s, DiracBlock *block,
1484 uint16_t *mctmp, uint8_t *obmc_weight,
1485 int plane, int dstx, int dsty)
1487 Plane *p = &s->plane[plane];
1488 const uint8_t *src[5];
1491 switch (block->ref&3) {
1493 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1497 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1498 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1500 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1501 s->weight[0] + s->weight[1], p->yblen);
1504 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1505 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1506 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1507 if (s->biweight_func) {
1508 /* fixme: +32 is a quick hack */
1509 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1510 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1511 s->weight[0], s->weight[1], p->yblen);
1513 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1516 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1519 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1521 Plane *p = &s->plane[plane];
1522 int x, dstx = p->xbsep - p->xoffset;
1524 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1527 for (x = 1; x < s->blwidth-1; x++) {
1528 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1532 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1535 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1543 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1544 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1545 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1546 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1547 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1548 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1550 s->weight_func = NULL;
1551 s->biweight_func = NULL;
1555 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1557 /* chroma allocates an edge of 8 when subsampled
1558 which for 4:2:2 means an h edge of 16 and v edge of 8
1559 just use 8 for everything for the moment */
1560 int i, edge = EDGE_WIDTH/2;
1562 ref->hpel[plane][0] = ref->avframe->data[plane];
1563 s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1565 /* no need for hpel if we only have fpel vectors */
1566 if (!s->mv_precision)
1569 for (i = 1; i < 4; i++) {
1570 if (!ref->hpel_base[plane][i])
1571 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1572 /* we need to be 16-byte aligned even for chroma */
1573 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1576 if (!ref->interpolated[plane]) {
1577 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1578 ref->hpel[plane][3], ref->hpel[plane][0],
1579 ref->avframe->linesize[plane], width, height);
1580 s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1581 s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1582 s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1584 ref->interpolated[plane] = 1;
1588 * Dirac Specification ->
1589 * 13.0 Transform data syntax. transform_data()
1591 static int dirac_decode_frame_internal(DiracContext *s)
1594 int y, i, comp, dsty;
1597 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1598 for (comp = 0; comp < 3; comp++) {
1599 Plane *p = &s->plane[comp];
1600 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1606 for (comp = 0; comp < 3; comp++) {
1607 Plane *p = &s->plane[comp];
1608 uint8_t *frame = s->current_picture->avframe->data[comp];
1610 /* FIXME: small resolutions */
1611 for (i = 0; i < 4; i++)
1612 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1614 if (!s->zero_res && !s->low_delay)
1616 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1617 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1619 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1620 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1623 if (!s->num_refs) { /* intra */
1624 for (y = 0; y < p->height; y += 16) {
1625 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1626 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1627 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1629 } else { /* inter */
1630 int rowheight = p->ybsep*p->stride;
1632 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1634 for (i = 0; i < s->num_refs; i++)
1635 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1637 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1640 for (y = 0; y < s->blheight; y++) {
1642 start = FFMAX(dsty, 0);
1643 uint16_t *mctmp = s->mctmp + y*rowheight;
1644 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1646 init_obmc_weights(s, p, y);
1648 if (y == s->blheight-1 || start+p->ybsep > p->height)
1649 h = p->height - start;
1651 h = p->ybsep - (start - dsty);
1655 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1656 mc_row(s, blocks, mctmp, comp, dsty);
1658 mctmp += (start - dsty)*p->stride + p->xoffset;
1659 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1660 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1661 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1672 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1675 int chroma_x_shift, chroma_y_shift;
1676 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1678 f->width = avctx->width + 2 * EDGE_WIDTH;
1679 f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1680 ret = ff_get_buffer(avctx, f, flags);
1684 for (i = 0; f->data[i]; i++) {
1685 int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1686 f->linesize[i] + 32;
1687 f->data[i] += offset;
1689 f->width = avctx->width;
1690 f->height = avctx->height;
1696 * Dirac Specification ->
1697 * 11.1.1 Picture Header. picture_header()
1699 static int dirac_decode_picture_header(DiracContext *s)
1702 int i, j, refnum, refdist;
1703 GetBitContext *gb = &s->gb;
1705 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1706 picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1709 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1711 /* if this is the first keyframe after a sequence header, start our
1712 reordering from here */
1713 if (s->frame_number < 0)
1714 s->frame_number = picnum;
1716 s->ref_pics[0] = s->ref_pics[1] = NULL;
1717 for (i = 0; i < s->num_refs; i++) {
1718 refnum = picnum + dirac_get_se_golomb(gb);
1721 /* find the closest reference to the one we want */
1722 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1723 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1724 if (s->ref_frames[j]
1725 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1726 s->ref_pics[i] = s->ref_frames[j];
1727 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1730 if (!s->ref_pics[i] || refdist)
1731 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1733 /* if there were no references at all, allocate one */
1734 if (!s->ref_pics[i])
1735 for (j = 0; j < MAX_FRAMES; j++)
1736 if (!s->all_frames[j].avframe->data[0]) {
1737 s->ref_pics[i] = &s->all_frames[j];
1738 get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1743 /* retire the reference frames that are not used anymore */
1744 if (s->current_picture->avframe->reference) {
1745 retire = picnum + dirac_get_se_golomb(gb);
1746 if (retire != picnum) {
1747 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1750 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1752 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1755 /* if reference array is full, remove the oldest as per the spec */
1756 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1757 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1758 remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1763 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1765 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1768 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1775 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1777 DiracFrame *out = s->delay_frames[0];
1781 /* find frame with lowest picture number */
1782 for (i = 1; s->delay_frames[i]; i++)
1783 if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1784 out = s->delay_frames[i];
1788 for (i = out_idx; s->delay_frames[i]; i++)
1789 s->delay_frames[i] = s->delay_frames[i+1];
1792 out->avframe->reference ^= DELAYED_PIC_REF;
1794 if((ret = av_frame_ref(picture, out->avframe)) < 0)
1802 * Dirac Specification ->
1803 * 9.6 Parse Info Header Syntax. parse_info()
1804 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1806 #define DATA_UNIT_HEADER_SIZE 13
1808 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1809 inside the function parse_sequence() */
1810 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1812 DiracContext *s = avctx->priv_data;
1813 DiracFrame *pic = NULL;
1814 int ret, i, parse_code = buf[4];
1817 if (size < DATA_UNIT_HEADER_SIZE)
1820 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1822 if (parse_code == pc_seq_header) {
1823 if (s->seen_sequence_header)
1826 /* [DIRAC_STD] 10. Sequence header */
1827 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1830 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1832 if (alloc_sequence_buffers(s))
1835 s->seen_sequence_header = 1;
1836 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1837 free_sequence_buffers(s);
1838 s->seen_sequence_header = 0;
1839 } else if (parse_code == pc_aux_data) {
1840 if (buf[13] == 1) { /* encoder implementation/version */
1842 /* versions older than 1.0.8 don't store quant delta for
1843 subbands with only one codeblock */
1844 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1845 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1846 s->old_delta_quant = 1;
1848 } else if (parse_code & 0x8) { /* picture data unit */
1849 if (!s->seen_sequence_header) {
1850 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1854 /* find an unused frame */
1855 for (i = 0; i < MAX_FRAMES; i++)
1856 if (s->all_frames[i].avframe->data[0] == NULL)
1857 pic = &s->all_frames[i];
1859 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1863 av_frame_unref(pic->avframe);
1865 /* [DIRAC_STD] Defined in 9.6.1 ... */
1866 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1868 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1872 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1873 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1874 pic->avframe->reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1875 pic->avframe->key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1876 pic->avframe->pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1878 if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1880 s->current_picture = pic;
1881 s->plane[0].stride = pic->avframe->linesize[0];
1882 s->plane[1].stride = pic->avframe->linesize[1];
1883 s->plane[2].stride = pic->avframe->linesize[2];
1885 if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1886 return AVERROR(ENOMEM);
1888 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1889 if (dirac_decode_picture_header(s))
1892 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1893 if (dirac_decode_frame_internal(s))
1899 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1901 DiracContext *s = avctx->priv_data;
1902 AVFrame *picture = data;
1903 uint8_t *buf = pkt->data;
1904 int buf_size = pkt->size;
1905 int i, data_unit_size, buf_idx = 0;
1908 /* release unused frames */
1909 for (i = 0; i < MAX_FRAMES; i++)
1910 if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1911 av_frame_unref(s->all_frames[i].avframe);
1912 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1915 s->current_picture = NULL;
1918 /* end of stream, so flush delayed pics */
1920 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1923 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1924 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1925 BBCD start code search */
1926 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1927 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1928 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1931 /* BBCD found or end of data */
1932 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1935 data_unit_size = AV_RB32(buf+buf_idx+5);
1936 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1937 if(buf_idx + data_unit_size > buf_size)
1938 av_log(s->avctx, AV_LOG_ERROR,
1939 "Data unit with size %d is larger than input buffer, discarding\n",
1944 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1945 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1947 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1950 buf_idx += data_unit_size;
1953 if (!s->current_picture)
1956 if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1957 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1959 s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1961 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1962 int min_num = s->delay_frames[0]->avframe->display_picture_number;
1963 /* Too many delayed frames, so we display the frame with the lowest pts */
1964 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1965 delayed_frame = s->delay_frames[0];
1967 for (i = 1; s->delay_frames[i]; i++)
1968 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1969 min_num = s->delay_frames[i]->avframe->display_picture_number;
1971 delayed_frame = remove_frame(s->delay_frames, min_num);
1972 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1975 if (delayed_frame) {
1976 delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1977 if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1981 } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1982 /* The right frame at the right time :-) */
1983 if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1989 s->frame_number = picture->display_picture_number + 1;
1994 AVCodec ff_dirac_decoder = {
1996 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1997 .type = AVMEDIA_TYPE_VIDEO,
1998 .id = AV_CODEC_ID_DIRAC,
1999 .priv_data_size = sizeof(DiracContext),
2000 .init = dirac_decode_init,
2001 .close = dirac_decode_end,
2002 .decode = dirac_decode_frame,
2003 .capabilities = CODEC_CAP_DELAY,
2004 .flush = dirac_decode_flush,