2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
31 #include "bytestream.h"
34 #include "dirac_arith.h"
35 #include "mpeg12data.h"
36 #include "libavcodec/mpegvideo.h"
37 #include "mpegvideoencdsp.h"
38 #include "dirac_dwt.h"
44 * The spec limits the number of wavelet decompositions to 4 for both
45 * level 1 (VC-2) and 128 (long-gop default).
46 * 5 decompositions is the maximum before >16-bit buffers are needed.
47 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
48 * the others to 4 decompositions (or 3 for the fidelity filter).
50 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
52 #define MAX_DWT_LEVELS 5
55 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
57 #define MAX_REFERENCE_FRAMES 8
58 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
59 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
60 #define MAX_QUANT 68 /* max quant for VC-2 */
61 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
64 * DiracBlock->ref flags, if set then the block does MC from the given ref
66 #define DIRAC_REF_MASK_REF1 1
67 #define DIRAC_REF_MASK_REF2 2
68 #define DIRAC_REF_MASK_GLOBAL 4
71 * Value of Picture.reference when Picture is not a reference picture, but
72 * is held for delayed output.
74 #define DELAYED_PIC_REF 4
76 #define CALC_PADDING(size, depth) \
77 (((size + (1 << depth) - 1) >> depth) << depth)
79 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
83 int interpolated[3]; /* 1 if hpel[] is valid */
85 uint8_t *hpel_base[3][4];
92 } u; /* anonymous unions aren't in C99 :( */
96 typedef struct SubBand {
104 struct SubBand *parent;
108 const uint8_t *coeff_data;
111 typedef struct Plane {
120 IDWTELEM *idwt_buf_base;
126 /* block separation (block n+1 starts after this many pixels in block n) */
129 /* amount of overspill on each edge (half of the overlap between blocks) */
133 SubBand band[MAX_DWT_LEVELS][4];
136 typedef struct DiracContext {
137 AVCodecContext *avctx;
138 MpegvideoEncDSPContext mpvencdsp;
139 VideoDSPContext vdsp;
140 DiracDSPContext diracdsp;
142 dirac_source_params source;
143 int seen_sequence_header;
144 int frame_number; /* number of the next frame to display */
149 int zero_res; /* zero residue flag */
150 int is_arith; /* whether coeffs use arith or golomb coding */
151 int low_delay; /* use the low delay syntax */
152 int globalmc_flag; /* use global motion compensation */
153 int num_refs; /* number of reference pictures */
155 /* wavelet decoding */
156 unsigned wavelet_depth; /* depth of the IDWT */
157 unsigned wavelet_idx;
160 * schroedinger older than 1.0.8 doesn't store
161 * quant delta if only one codebook exists in a band
163 unsigned old_delta_quant;
164 unsigned codeblock_mode;
169 } codeblock[MAX_DWT_LEVELS+1];
172 unsigned num_x; /* number of horizontal slices */
173 unsigned num_y; /* number of vertical slices */
174 AVRational bytes; /* average bytes per slice */
175 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
179 int pan_tilt[2]; /* pan/tilt vector */
180 int zrs[2][2]; /* zoom/rotate/shear matrix */
181 int perspective[2]; /* perspective vector */
183 unsigned perspective_exp;
186 /* motion compensation */
187 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
188 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
189 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
191 int blwidth; /* number of blocks (horizontally) */
192 int blheight; /* number of blocks (vertically) */
193 int sbwidth; /* number of superblocks (horizontally) */
194 int sbheight; /* number of superblocks (vertically) */
197 DiracBlock *blmotion;
199 uint8_t *edge_emu_buffer[4];
200 uint8_t *edge_emu_buffer_base;
202 uint16_t *mctmp; /* buffer holding the MC data multiplied by OBMC weights */
206 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
208 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
209 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
210 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
211 dirac_weight_func weight_func;
212 dirac_biweight_func biweight_func;
214 DiracFrame *current_picture;
215 DiracFrame *ref_pics[2];
217 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
218 DiracFrame *delay_frames[MAX_DELAY+1];
219 DiracFrame all_frames[MAX_FRAMES];
223 * Dirac Specification ->
224 * Parse code values. 9.6.1 Table 9.1
226 enum dirac_parse_code {
227 pc_seq_header = 0x00,
240 static const uint8_t default_qmat[][4][4] = {
241 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
242 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
243 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
244 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
245 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
246 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
247 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
250 static const int qscale_tab[MAX_QUANT+1] = {
251 4, 5, 6, 7, 8, 10, 11, 13,
252 16, 19, 23, 27, 32, 38, 45, 54,
253 64, 76, 91, 108, 128, 152, 181, 215,
254 256, 304, 362, 431, 512, 609, 724, 861,
255 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
256 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
257 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
261 static const int qoffset_intra_tab[MAX_QUANT+1] = {
262 1, 2, 3, 4, 4, 5, 6, 7,
263 8, 10, 12, 14, 16, 19, 23, 27,
264 32, 38, 46, 54, 64, 76, 91, 108,
265 128, 152, 181, 216, 256, 305, 362, 431,
266 512, 609, 724, 861, 1024, 1218, 1448, 1722,
267 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
268 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
272 static const int qoffset_inter_tab[MAX_QUANT+1] = {
273 1, 2, 2, 3, 3, 4, 4, 5,
274 6, 7, 9, 10, 12, 14, 17, 20,
275 24, 29, 34, 41, 48, 57, 68, 81,
276 96, 114, 136, 162, 192, 228, 272, 323,
277 384, 457, 543, 646, 768, 913, 1086, 1292,
278 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
279 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
283 /* magic number division by 3 from schroedinger */
284 static inline int divide3(int x)
286 return ((x+1)*21845 + 10922) >> 16;
289 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
291 DiracFrame *remove_pic = NULL;
292 int i, remove_idx = -1;
294 for (i = 0; framelist[i]; i++)
295 if (framelist[i]->avframe->display_picture_number == picnum) {
296 remove_pic = framelist[i];
301 for (i = remove_idx; framelist[i]; i++)
302 framelist[i] = framelist[i+1];
307 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
310 for (i = 0; i < maxframes; i++)
312 framelist[i] = frame;
318 static int alloc_sequence_buffers(DiracContext *s)
320 int sbwidth = DIVRNDUP(s->source.width, 4);
321 int sbheight = DIVRNDUP(s->source.height, 4);
322 int i, w, h, top_padding;
324 /* todo: think more about this / use or set Plane here */
325 for (i = 0; i < 3; i++) {
326 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
327 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
328 w = s->source.width >> (i ? s->chroma_x_shift : 0);
329 h = s->source.height >> (i ? s->chroma_y_shift : 0);
331 /* we allocate the max we support here since num decompositions can
332 * change from frame to frame. Stride is aligned to 16 for SIMD, and
333 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
334 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
336 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
337 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
338 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
340 s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
341 s->plane[i].idwt_tmp = av_malloc_array((w+16), sizeof(IDWTELEM));
342 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
343 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
344 return AVERROR(ENOMEM);
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc_array(sbwidth, sbheight);
349 s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
351 if (!s->sbsplit || !s->blmotion)
352 return AVERROR(ENOMEM);
356 static int alloc_buffers(DiracContext *s, int stride)
358 int w = s->source.width;
359 int h = s->source.height;
361 av_assert0(stride >= w);
364 if (s->buffer_stride >= stride)
366 s->buffer_stride = 0;
368 av_freep(&s->edge_emu_buffer_base);
369 memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
371 av_freep(&s->mcscratch);
373 s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
375 s->mctmp = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
376 s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
378 if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
379 return AVERROR(ENOMEM);
381 s->buffer_stride = stride;
385 static void free_sequence_buffers(DiracContext *s)
389 for (i = 0; i < MAX_FRAMES; i++) {
390 if (s->all_frames[i].avframe->data[0]) {
391 av_frame_unref(s->all_frames[i].avframe);
392 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
395 for (j = 0; j < 3; j++)
396 for (k = 1; k < 4; k++)
397 av_freep(&s->all_frames[i].hpel_base[j][k]);
400 memset(s->ref_frames, 0, sizeof(s->ref_frames));
401 memset(s->delay_frames, 0, sizeof(s->delay_frames));
403 for (i = 0; i < 3; i++) {
404 av_freep(&s->plane[i].idwt_buf_base);
405 av_freep(&s->plane[i].idwt_tmp);
408 s->buffer_stride = 0;
409 av_freep(&s->sbsplit);
410 av_freep(&s->blmotion);
411 av_freep(&s->edge_emu_buffer_base);
414 av_freep(&s->mcscratch);
417 static av_cold int dirac_decode_init(AVCodecContext *avctx)
419 DiracContext *s = avctx->priv_data;
423 s->frame_number = -1;
425 ff_diracdsp_init(&s->diracdsp);
426 ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
427 ff_videodsp_init(&s->vdsp, 8);
429 for (i = 0; i < MAX_FRAMES; i++) {
430 s->all_frames[i].avframe = av_frame_alloc();
431 if (!s->all_frames[i].avframe) {
433 av_frame_free(&s->all_frames[--i].avframe);
434 return AVERROR(ENOMEM);
441 static void dirac_decode_flush(AVCodecContext *avctx)
443 DiracContext *s = avctx->priv_data;
444 free_sequence_buffers(s);
445 s->seen_sequence_header = 0;
446 s->frame_number = -1;
449 static av_cold int dirac_decode_end(AVCodecContext *avctx)
451 DiracContext *s = avctx->priv_data;
454 dirac_decode_flush(avctx);
455 for (i = 0; i < MAX_FRAMES; i++)
456 av_frame_free(&s->all_frames[i].avframe);
461 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
463 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
464 SubBand *b, IDWTELEM *buf, int x, int y)
468 int pred_ctx = CTX_ZPZN_F1;
470 /* Check if the parent subband has a 0 in the corresponding position */
472 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
474 if (b->orientation == subband_hl)
475 sign_pred = buf[-b->stride];
477 /* Determine if the pixel has only zeros in its neighbourhood */
479 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
480 if (b->orientation == subband_lh)
483 pred_ctx += !buf[-b->stride];
486 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
488 coeff = (coeff * qfactor + qoffset + 2) >> 2;
489 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
490 coeff = (coeff ^ -sign) + sign;
495 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
499 coeff = svq3_get_ue_golomb(gb);
501 coeff = (coeff * qfactor + qoffset + 2) >> 2;
502 sign = get_bits1(gb);
503 coeff = (coeff ^ -sign) + sign;
509 * Decode the coeffs in the rectangle defined by left, right, top, bottom
510 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
512 static inline void codeblock(DiracContext *s, SubBand *b,
513 GetBitContext *gb, DiracArith *c,
514 int left, int right, int top, int bottom,
515 int blockcnt_one, int is_arith)
517 int x, y, zero_block;
518 int qoffset, qfactor;
521 /* check for any coded coefficients in this codeblock */
524 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
526 zero_block = get_bits1(gb);
532 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
533 int quant = b->quant;
535 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
537 quant += dirac_get_se_golomb(gb);
539 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
545 b->quant = FFMIN(b->quant, MAX_QUANT);
547 qfactor = qscale_tab[b->quant];
548 /* TODO: context pointer? */
550 qoffset = qoffset_intra_tab[b->quant];
552 qoffset = qoffset_inter_tab[b->quant];
554 buf = b->ibuf + top * b->stride;
555 for (y = top; y < bottom; y++) {
556 for (x = left; x < right; x++) {
557 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
559 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
561 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
568 * Dirac Specification ->
569 * 13.3 intra_dc_prediction(band)
571 static inline void intra_dc_prediction(SubBand *b)
573 IDWTELEM *buf = b->ibuf;
576 for (x = 1; x < b->width; x++)
580 for (y = 1; y < b->height; y++) {
581 buf[0] += buf[-b->stride];
583 for (x = 1; x < b->width; x++) {
584 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
585 buf[x] += divide3(pred);
592 * Dirac Specification ->
593 * 13.4.2 Non-skipped subbands. subband_coeffs()
595 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
597 int cb_x, cb_y, left, right, top, bottom;
600 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
601 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
602 int blockcnt_one = (cb_width + cb_height) == 2;
607 init_get_bits8(&gb, b->coeff_data, b->length);
610 ff_dirac_init_arith_decoder(&c, &gb, b->length);
613 for (cb_y = 0; cb_y < cb_height; cb_y++) {
614 bottom = (b->height * (cb_y+1)) / cb_height;
616 for (cb_x = 0; cb_x < cb_width; cb_x++) {
617 right = (b->width * (cb_x+1)) / cb_width;
618 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
624 if (b->orientation == subband_ll && s->num_refs == 0)
625 intra_dc_prediction(b);
628 static int decode_subband_arith(AVCodecContext *avctx, void *b)
630 DiracContext *s = avctx->priv_data;
631 decode_subband_internal(s, b, 1);
635 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
637 DiracContext *s = avctx->priv_data;
639 decode_subband_internal(s, *b, 0);
644 * Dirac Specification ->
645 * [DIRAC_STD] 13.4.1 core_transform_data()
647 static void decode_component(DiracContext *s, int comp)
649 AVCodecContext *avctx = s->avctx;
650 SubBand *bands[3*MAX_DWT_LEVELS+1];
651 enum dirac_subband orientation;
652 int level, num_bands = 0;
654 /* Unpack all subbands at all levels. */
655 for (level = 0; level < s->wavelet_depth; level++) {
656 for (orientation = !!level; orientation < 4; orientation++) {
657 SubBand *b = &s->plane[comp].band[level][orientation];
658 bands[num_bands++] = b;
660 align_get_bits(&s->gb);
661 /* [DIRAC_STD] 13.4.2 subband() */
662 b->length = svq3_get_ue_golomb(&s->gb);
664 b->quant = svq3_get_ue_golomb(&s->gb);
665 align_get_bits(&s->gb);
666 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
667 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
668 skip_bits_long(&s->gb, b->length*8);
671 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
673 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
674 NULL, 4-!!level, sizeof(SubBand));
676 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
678 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
681 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
682 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
683 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
684 int slice_x, int slice_y, int bits_end,
685 SubBand *b1, SubBand *b2)
687 int left = b1->width * slice_x / s->lowdelay.num_x;
688 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
689 int top = b1->height * slice_y / s->lowdelay.num_y;
690 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
692 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
693 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
695 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
696 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
698 /* we have to constantly check for overread since the spec explicitly
699 requires this, with the meaning that all remaining coeffs are set to 0 */
700 if (get_bits_count(gb) >= bits_end)
703 for (y = top; y < bottom; y++) {
704 for (x = left; x < right; x++) {
705 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
706 if (get_bits_count(gb) >= bits_end)
709 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
710 if (get_bits_count(gb) >= bits_end)
720 struct lowdelay_slice {
729 * Dirac Specification ->
730 * 13.5.2 Slices. slice(sx,sy)
732 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
734 DiracContext *s = avctx->priv_data;
735 struct lowdelay_slice *slice = arg;
736 GetBitContext *gb = &slice->gb;
737 enum dirac_subband orientation;
738 int level, quant, chroma_bits, chroma_end;
740 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
741 int length_bits = av_log2(8 * slice->bytes)+1;
742 int luma_bits = get_bits_long(gb, length_bits);
743 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
745 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
746 for (level = 0; level < s->wavelet_depth; level++)
747 for (orientation = !!level; orientation < 4; orientation++) {
748 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
749 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
750 &s->plane[0].band[level][orientation], NULL);
753 /* consume any unused bits from luma */
754 skip_bits_long(gb, get_bits_count(gb) - luma_end);
756 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
757 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
758 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
759 for (level = 0; level < s->wavelet_depth; level++)
760 for (orientation = !!level; orientation < 4; orientation++) {
761 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
762 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
763 &s->plane[1].band[level][orientation],
764 &s->plane[2].band[level][orientation]);
771 * Dirac Specification ->
772 * 13.5.1 low_delay_transform_data()
774 static void decode_lowdelay(DiracContext *s)
776 AVCodecContext *avctx = s->avctx;
777 int slice_x, slice_y, bytes, bufsize;
779 struct lowdelay_slice *slices;
782 slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
784 align_get_bits(&s->gb);
785 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
786 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
787 bufsize = get_bits_left(&s->gb);
789 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
790 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
791 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
792 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
794 slices[slice_num].bytes = bytes;
795 slices[slice_num].slice_x = slice_x;
796 slices[slice_num].slice_y = slice_y;
797 init_get_bits(&slices[slice_num].gb, buf, bufsize);
804 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
805 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
806 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
807 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
808 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
812 static void init_planes(DiracContext *s)
814 int i, w, h, level, orientation;
816 for (i = 0; i < 3; i++) {
817 Plane *p = &s->plane[i];
819 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
820 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
821 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
822 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
823 p->idwt_stride = FFALIGN(p->idwt_width, 8);
825 for (level = s->wavelet_depth-1; level >= 0; level--) {
828 for (orientation = !!level; orientation < 4; orientation++) {
829 SubBand *b = &p->band[level][orientation];
831 b->ibuf = p->idwt_buf;
833 b->stride = p->idwt_stride << (s->wavelet_depth - level);
836 b->orientation = orientation;
841 b->ibuf += b->stride>>1;
844 b->parent = &p->band[level-1][orientation];
849 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
850 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
851 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
852 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
855 p->xoffset = (p->xblen - p->xbsep)/2;
856 p->yoffset = (p->yblen - p->ybsep)/2;
861 * Unpack the motion compensation parameters
862 * Dirac Specification ->
863 * 11.2 Picture prediction data. picture_prediction()
865 static int dirac_unpack_prediction_parameters(DiracContext *s)
867 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
868 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
870 GetBitContext *gb = &s->gb;
874 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
875 /* Luma and Chroma are equal. 11.2.3 */
876 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
879 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
884 s->plane[0].xblen = svq3_get_ue_golomb(gb);
885 s->plane[0].yblen = svq3_get_ue_golomb(gb);
886 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
887 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
889 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
890 s->plane[0].xblen = default_blen[idx-1];
891 s->plane[0].yblen = default_blen[idx-1];
892 s->plane[0].xbsep = default_bsep[idx-1];
893 s->plane[0].ybsep = default_bsep[idx-1];
895 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
896 Calculated in function dirac_unpack_block_motion_data */
898 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
899 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
902 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
903 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
906 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
907 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
911 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
912 Read motion vector precision */
913 s->mv_precision = svq3_get_ue_golomb(gb);
914 if (s->mv_precision > 3) {
915 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
919 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
920 Read the global motion compensation parameters */
921 s->globalmc_flag = get_bits1(gb);
922 if (s->globalmc_flag) {
923 memset(s->globalmc, 0, sizeof(s->globalmc));
924 /* [DIRAC_STD] pan_tilt(gparams) */
925 for (ref = 0; ref < s->num_refs; ref++) {
927 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
928 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
930 /* [DIRAC_STD] zoom_rotate_shear(gparams)
931 zoom/rotation/shear parameters */
933 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
934 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
935 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
936 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
937 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
939 s->globalmc[ref].zrs[0][0] = 1;
940 s->globalmc[ref].zrs[1][1] = 1;
942 /* [DIRAC_STD] perspective(gparams) */
944 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
945 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
946 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
951 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
952 Picture prediction mode, not currently used. */
953 if (svq3_get_ue_golomb(gb)) {
954 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
958 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
959 just data read, weight calculation will be done later on. */
960 s->weight_log2denom = 1;
965 s->weight_log2denom = svq3_get_ue_golomb(gb);
966 s->weight[0] = dirac_get_se_golomb(gb);
967 if (s->num_refs == 2)
968 s->weight[1] = dirac_get_se_golomb(gb);
974 * Dirac Specification ->
975 * 11.3 Wavelet transform data. wavelet_transform()
977 static int dirac_unpack_idwt_params(DiracContext *s)
979 GetBitContext *gb = &s->gb;
983 #define CHECKEDREAD(dst, cond, errmsg) \
984 tmp = svq3_get_ue_golomb(gb); \
986 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
993 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
997 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
998 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1000 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1002 if (!s->low_delay) {
1003 /* Codeblock parameters (core syntax only) */
1004 if (get_bits1(gb)) {
1005 for (i = 0; i <= s->wavelet_depth; i++) {
1006 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
1007 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
1010 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1012 for (i = 0; i <= s->wavelet_depth; i++)
1013 s->codeblock[i].width = s->codeblock[i].height = 1;
1015 /* Slice parameters + quantization matrix*/
1016 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1017 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
1018 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
1019 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1020 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1022 if (s->lowdelay.bytes.den <= 0) {
1023 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1024 return AVERROR_INVALIDDATA;
1027 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1028 if (get_bits1(gb)) {
1029 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1030 /* custom quantization matrix */
1031 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1032 for (level = 0; level < s->wavelet_depth; level++) {
1033 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1034 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1035 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1038 if (s->wavelet_depth > 4) {
1039 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1040 return AVERROR_INVALIDDATA;
1042 /* default quantization matrix */
1043 for (level = 0; level < s->wavelet_depth; level++)
1044 for (i = 0; i < 4; i++) {
1045 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1046 /* haar with no shift differs for different depths */
1047 if (s->wavelet_idx == 3)
1048 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1055 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1057 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1064 return sbsplit[-stride];
1066 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1069 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1076 return block[-1].ref & refmask;
1078 return block[-stride].ref & refmask;
1080 /* return the majority */
1081 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1082 return (pred >> 1) & refmask;
1085 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1089 memset(block->u.dc, 0, sizeof(block->u.dc));
1091 if (x && !(block[-1].ref & 3)) {
1092 for (i = 0; i < 3; i++)
1093 block->u.dc[i] += block[-1].u.dc[i];
1097 if (y && !(block[-stride].ref & 3)) {
1098 for (i = 0; i < 3; i++)
1099 block->u.dc[i] += block[-stride].u.dc[i];
1103 if (x && y && !(block[-1-stride].ref & 3)) {
1104 for (i = 0; i < 3; i++)
1105 block->u.dc[i] += block[-1-stride].u.dc[i];
1110 for (i = 0; i < 3; i++)
1111 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1112 } else if (n == 3) {
1113 for (i = 0; i < 3; i++)
1114 block->u.dc[i] = divide3(block->u.dc[i]);
1118 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1121 int refmask = ref+1;
1122 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1125 if (x && (block[-1].ref & mask) == refmask)
1126 pred[n++] = block[-1].u.mv[ref];
1128 if (y && (block[-stride].ref & mask) == refmask)
1129 pred[n++] = block[-stride].u.mv[ref];
1131 if (x && y && (block[-stride-1].ref & mask) == refmask)
1132 pred[n++] = block[-stride-1].u.mv[ref];
1136 block->u.mv[ref][0] = 0;
1137 block->u.mv[ref][1] = 0;
1140 block->u.mv[ref][0] = pred[0][0];
1141 block->u.mv[ref][1] = pred[0][1];
1144 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1145 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1148 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1149 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1154 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1156 int ez = s->globalmc[ref].zrs_exp;
1157 int ep = s->globalmc[ref].perspective_exp;
1158 int (*A)[2] = s->globalmc[ref].zrs;
1159 int *b = s->globalmc[ref].pan_tilt;
1160 int *c = s->globalmc[ref].perspective;
1162 int m = (1<<ep) - (c[0]*x + c[1]*y);
1163 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1164 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1166 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1167 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1170 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1171 int stride, int x, int y)
1175 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1176 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1178 if (s->num_refs == 2) {
1179 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1180 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1184 pred_block_dc(block, stride, x, y);
1185 for (i = 0; i < 3; i++)
1186 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1190 if (s->globalmc_flag) {
1191 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1192 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1195 for (i = 0; i < s->num_refs; i++)
1196 if (block->ref & (i+1)) {
1197 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1198 global_mv(s, block, x, y, i);
1200 pred_mv(block, stride, x, y, i);
1201 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1202 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1208 * Copies the current block to the other blocks covered by the current superblock split mode
1210 static void propagate_block_data(DiracBlock *block, int stride, int size)
1213 DiracBlock *dst = block;
1215 for (x = 1; x < size; x++)
1218 for (y = 1; y < size; y++) {
1220 for (x = 0; x < size; x++)
1226 * Dirac Specification ->
1227 * 12. Block motion data syntax
1229 static int dirac_unpack_block_motion_data(DiracContext *s)
1231 GetBitContext *gb = &s->gb;
1232 uint8_t *sbsplit = s->sbsplit;
1234 DiracArith arith[8];
1238 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1239 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1240 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1241 s->blwidth = 4 * s->sbwidth;
1242 s->blheight = 4 * s->sbheight;
1244 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1245 decode superblock split modes */
1246 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1247 for (y = 0; y < s->sbheight; y++) {
1248 for (x = 0; x < s->sbwidth; x++) {
1249 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1252 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1254 sbsplit += s->sbwidth;
1257 /* setup arith decoding */
1258 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1259 for (i = 0; i < s->num_refs; i++) {
1260 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1261 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1263 for (i = 0; i < 3; i++)
1264 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1266 for (y = 0; y < s->sbheight; y++)
1267 for (x = 0; x < s->sbwidth; x++) {
1268 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1269 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1271 for (q = 0; q < blkcnt; q++)
1272 for (p = 0; p < blkcnt; p++) {
1273 int bx = 4 * x + p*step;
1274 int by = 4 * y + q*step;
1275 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1276 decode_block_params(s, arith, block, s->blwidth, bx, by);
1277 propagate_block_data(block, s->blwidth, step);
1284 static int weight(int i, int blen, int offset)
1286 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1287 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1291 else if (i > blen-1 - 2*offset)
1292 return ROLLOFF(blen-1 - i);
1296 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1297 int left, int right, int wy)
1300 for (x = 0; left && x < p->xblen >> 1; x++)
1301 obmc_weight[x] = wy*8;
1302 for (; x < p->xblen >> right; x++)
1303 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1304 for (; x < p->xblen; x++)
1305 obmc_weight[x] = wy*8;
1306 for (; x < stride; x++)
1310 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1311 int left, int right, int top, int bottom)
1314 for (y = 0; top && y < p->yblen >> 1; y++) {
1315 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1316 obmc_weight += stride;
1318 for (; y < p->yblen >> bottom; y++) {
1319 int wy = weight(y, p->yblen, p->yoffset);
1320 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1321 obmc_weight += stride;
1323 for (; y < p->yblen; y++) {
1324 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1325 obmc_weight += stride;
1329 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1332 int bottom = by == s->blheight-1;
1334 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1335 if (top || bottom || by == 1) {
1336 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1337 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1338 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1342 static const uint8_t epel_weights[4][4][4] = {
1362 * For block x,y, determine which of the hpel planes to do bilinear
1363 * interpolation from and set src[] to the location in each hpel plane
1366 * @return the index of the put_dirac_pixels_tab function to use
1367 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1369 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1370 int x, int y, int ref, int plane)
1372 Plane *p = &s->plane[plane];
1373 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1374 int motion_x = block->u.mv[ref][0];
1375 int motion_y = block->u.mv[ref][1];
1376 int mx, my, i, epel, nplanes = 0;
1379 motion_x >>= s->chroma_x_shift;
1380 motion_y >>= s->chroma_y_shift;
1383 mx = motion_x & ~(-1U << s->mv_precision);
1384 my = motion_y & ~(-1U << s->mv_precision);
1385 motion_x >>= s->mv_precision;
1386 motion_y >>= s->mv_precision;
1387 /* normalize subpel coordinates to epel */
1388 /* TODO: template this function? */
1389 mx <<= 3 - s->mv_precision;
1390 my <<= 3 - s->mv_precision;
1399 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1403 for (i = 0; i < 4; i++)
1404 src[i] = ref_hpel[i] + y*p->stride + x;
1406 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1407 we increment x/y because the edge changes for half of the pixels */
1414 src[0] += p->stride;
1415 src[1] += p->stride;
1423 /* check if we really only need 2 planes since either mx or my is
1424 a hpel position. (epel weights of 0 handle this there) */
1426 /* mx == 0: average [0] and [2]
1427 mx == 4: average [1] and [3] */
1428 src[!mx] = src[2 + !!mx];
1430 } else if (!(my&3)) {
1431 src[0] = src[(my>>1) ];
1432 src[1] = src[(my>>1)+1];
1436 /* adjust the ordering if needed so the weights work */
1438 FFSWAP(const uint8_t *, src[0], src[1]);
1439 FFSWAP(const uint8_t *, src[2], src[3]);
1442 FFSWAP(const uint8_t *, src[0], src[2]);
1443 FFSWAP(const uint8_t *, src[1], src[3]);
1445 src[4] = epel_weights[my&3][mx&3];
1449 /* fixme: v/h _edge_pos */
1450 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1451 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1453 for (i = 0; i < nplanes; i++) {
1454 s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1455 p->stride, p->stride,
1456 p->xblen, p->yblen, x, y,
1457 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1458 src[i] = s->edge_emu_buffer[i];
1461 return (nplanes>>1) + epel;
1464 static void add_dc(uint16_t *dst, int dc, int stride,
1465 uint8_t *obmc_weight, int xblen, int yblen)
1470 for (y = 0; y < yblen; y++) {
1471 for (x = 0; x < xblen; x += 2) {
1472 dst[x ] += dc * obmc_weight[x ];
1473 dst[x+1] += dc * obmc_weight[x+1];
1476 obmc_weight += MAX_BLOCKSIZE;
1480 static void block_mc(DiracContext *s, DiracBlock *block,
1481 uint16_t *mctmp, uint8_t *obmc_weight,
1482 int plane, int dstx, int dsty)
1484 Plane *p = &s->plane[plane];
1485 const uint8_t *src[5];
1488 switch (block->ref&3) {
1490 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1494 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1495 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1497 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1498 s->weight[0] + s->weight[1], p->yblen);
1501 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1502 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1503 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1504 if (s->biweight_func) {
1505 /* fixme: +32 is a quick hack */
1506 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1507 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1508 s->weight[0], s->weight[1], p->yblen);
1510 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1513 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1516 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1518 Plane *p = &s->plane[plane];
1519 int x, dstx = p->xbsep - p->xoffset;
1521 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1524 for (x = 1; x < s->blwidth-1; x++) {
1525 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1529 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1532 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1540 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1541 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1542 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1543 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1544 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1545 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1547 s->weight_func = NULL;
1548 s->biweight_func = NULL;
1552 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1554 /* chroma allocates an edge of 8 when subsampled
1555 which for 4:2:2 means an h edge of 16 and v edge of 8
1556 just use 8 for everything for the moment */
1557 int i, edge = EDGE_WIDTH/2;
1559 ref->hpel[plane][0] = ref->avframe->data[plane];
1560 s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1562 /* no need for hpel if we only have fpel vectors */
1563 if (!s->mv_precision)
1566 for (i = 1; i < 4; i++) {
1567 if (!ref->hpel_base[plane][i])
1568 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1569 /* we need to be 16-byte aligned even for chroma */
1570 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1573 if (!ref->interpolated[plane]) {
1574 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1575 ref->hpel[plane][3], ref->hpel[plane][0],
1576 ref->avframe->linesize[plane], width, height);
1577 s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1578 s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1579 s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1581 ref->interpolated[plane] = 1;
1585 * Dirac Specification ->
1586 * 13.0 Transform data syntax. transform_data()
1588 static int dirac_decode_frame_internal(DiracContext *s)
1591 int y, i, comp, dsty;
1594 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1595 for (comp = 0; comp < 3; comp++) {
1596 Plane *p = &s->plane[comp];
1597 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1603 for (comp = 0; comp < 3; comp++) {
1604 Plane *p = &s->plane[comp];
1605 uint8_t *frame = s->current_picture->avframe->data[comp];
1607 /* FIXME: small resolutions */
1608 for (i = 0; i < 4; i++)
1609 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1611 if (!s->zero_res && !s->low_delay)
1613 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1614 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1616 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1617 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1620 if (!s->num_refs) { /* intra */
1621 for (y = 0; y < p->height; y += 16) {
1622 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1623 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1624 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1626 } else { /* inter */
1627 int rowheight = p->ybsep*p->stride;
1629 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1631 for (i = 0; i < s->num_refs; i++)
1632 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1634 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1637 for (y = 0; y < s->blheight; y++) {
1639 start = FFMAX(dsty, 0);
1640 uint16_t *mctmp = s->mctmp + y*rowheight;
1641 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1643 init_obmc_weights(s, p, y);
1645 if (y == s->blheight-1 || start+p->ybsep > p->height)
1646 h = p->height - start;
1648 h = p->ybsep - (start - dsty);
1652 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1653 mc_row(s, blocks, mctmp, comp, dsty);
1655 mctmp += (start - dsty)*p->stride + p->xoffset;
1656 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1657 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1658 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1669 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1672 int chroma_x_shift, chroma_y_shift;
1673 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1675 f->width = avctx->width + 2 * EDGE_WIDTH;
1676 f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1677 ret = ff_get_buffer(avctx, f, flags);
1681 for (i = 0; f->data[i]; i++) {
1682 int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1683 f->linesize[i] + 32;
1684 f->data[i] += offset;
1686 f->width = avctx->width;
1687 f->height = avctx->height;
1693 * Dirac Specification ->
1694 * 11.1.1 Picture Header. picture_header()
1696 static int dirac_decode_picture_header(DiracContext *s)
1699 int i, j, refnum, refdist;
1700 GetBitContext *gb = &s->gb;
1702 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1703 picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1706 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1708 /* if this is the first keyframe after a sequence header, start our
1709 reordering from here */
1710 if (s->frame_number < 0)
1711 s->frame_number = picnum;
1713 s->ref_pics[0] = s->ref_pics[1] = NULL;
1714 for (i = 0; i < s->num_refs; i++) {
1715 refnum = picnum + dirac_get_se_golomb(gb);
1718 /* find the closest reference to the one we want */
1719 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1720 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1721 if (s->ref_frames[j]
1722 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1723 s->ref_pics[i] = s->ref_frames[j];
1724 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1727 if (!s->ref_pics[i] || refdist)
1728 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1730 /* if there were no references at all, allocate one */
1731 if (!s->ref_pics[i])
1732 for (j = 0; j < MAX_FRAMES; j++)
1733 if (!s->all_frames[j].avframe->data[0]) {
1734 s->ref_pics[i] = &s->all_frames[j];
1735 get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1740 /* retire the reference frames that are not used anymore */
1741 if (s->current_picture->avframe->reference) {
1742 retire = picnum + dirac_get_se_golomb(gb);
1743 if (retire != picnum) {
1744 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1747 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1749 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1752 /* if reference array is full, remove the oldest as per the spec */
1753 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1754 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1755 remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1760 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1762 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1765 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1772 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1774 DiracFrame *out = s->delay_frames[0];
1778 /* find frame with lowest picture number */
1779 for (i = 1; s->delay_frames[i]; i++)
1780 if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1781 out = s->delay_frames[i];
1785 for (i = out_idx; s->delay_frames[i]; i++)
1786 s->delay_frames[i] = s->delay_frames[i+1];
1789 out->avframe->reference ^= DELAYED_PIC_REF;
1791 if((ret = av_frame_ref(picture, out->avframe)) < 0)
1799 * Dirac Specification ->
1800 * 9.6 Parse Info Header Syntax. parse_info()
1801 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1803 #define DATA_UNIT_HEADER_SIZE 13
1805 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1806 inside the function parse_sequence() */
1807 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1809 DiracContext *s = avctx->priv_data;
1810 DiracFrame *pic = NULL;
1811 int ret, i, parse_code = buf[4];
1814 if (size < DATA_UNIT_HEADER_SIZE)
1817 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1819 if (parse_code == pc_seq_header) {
1820 if (s->seen_sequence_header)
1823 /* [DIRAC_STD] 10. Sequence header */
1824 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1827 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1829 if (alloc_sequence_buffers(s))
1832 s->seen_sequence_header = 1;
1833 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1834 free_sequence_buffers(s);
1835 s->seen_sequence_header = 0;
1836 } else if (parse_code == pc_aux_data) {
1837 if (buf[13] == 1) { /* encoder implementation/version */
1839 /* versions older than 1.0.8 don't store quant delta for
1840 subbands with only one codeblock */
1841 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1842 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1843 s->old_delta_quant = 1;
1845 } else if (parse_code & 0x8) { /* picture data unit */
1846 if (!s->seen_sequence_header) {
1847 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1851 /* find an unused frame */
1852 for (i = 0; i < MAX_FRAMES; i++)
1853 if (s->all_frames[i].avframe->data[0] == NULL)
1854 pic = &s->all_frames[i];
1856 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1860 av_frame_unref(pic->avframe);
1862 /* [DIRAC_STD] Defined in 9.6.1 ... */
1863 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1865 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1869 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1870 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1871 pic->avframe->reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1872 pic->avframe->key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1873 pic->avframe->pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1875 if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1877 s->current_picture = pic;
1878 s->plane[0].stride = pic->avframe->linesize[0];
1879 s->plane[1].stride = pic->avframe->linesize[1];
1880 s->plane[2].stride = pic->avframe->linesize[2];
1882 if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1883 return AVERROR(ENOMEM);
1885 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1886 if (dirac_decode_picture_header(s))
1889 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1890 if (dirac_decode_frame_internal(s))
1896 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1898 DiracContext *s = avctx->priv_data;
1899 AVFrame *picture = data;
1900 uint8_t *buf = pkt->data;
1901 int buf_size = pkt->size;
1902 int i, data_unit_size, buf_idx = 0;
1905 /* release unused frames */
1906 for (i = 0; i < MAX_FRAMES; i++)
1907 if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1908 av_frame_unref(s->all_frames[i].avframe);
1909 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1912 s->current_picture = NULL;
1915 /* end of stream, so flush delayed pics */
1917 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1920 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1921 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1922 BBCD start code search */
1923 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1924 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1925 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1928 /* BBCD found or end of data */
1929 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1932 data_unit_size = AV_RB32(buf+buf_idx+5);
1933 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1934 if(buf_idx + data_unit_size > buf_size)
1935 av_log(s->avctx, AV_LOG_ERROR,
1936 "Data unit with size %d is larger than input buffer, discarding\n",
1941 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1942 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1944 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1947 buf_idx += data_unit_size;
1950 if (!s->current_picture)
1953 if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1954 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1956 s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1958 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1959 int min_num = s->delay_frames[0]->avframe->display_picture_number;
1960 /* Too many delayed frames, so we display the frame with the lowest pts */
1961 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1962 delayed_frame = s->delay_frames[0];
1964 for (i = 1; s->delay_frames[i]; i++)
1965 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1966 min_num = s->delay_frames[i]->avframe->display_picture_number;
1968 delayed_frame = remove_frame(s->delay_frames, min_num);
1969 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1972 if (delayed_frame) {
1973 delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1974 if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1978 } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1979 /* The right frame at the right time :-) */
1980 if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1986 s->frame_number = picture->display_picture_number + 1;
1991 AVCodec ff_dirac_decoder = {
1993 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1994 .type = AVMEDIA_TYPE_VIDEO,
1995 .id = AV_CODEC_ID_DIRAC,
1996 .priv_data_size = sizeof(DiracContext),
1997 .init = dirac_decode_init,
1998 .close = dirac_decode_end,
1999 .decode = dirac_decode_frame,
2000 .capabilities = CODEC_CAP_DELAY,
2001 .flush = dirac_decode_flush,