2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
35 #include "dirac_arith.h"
36 #include "mpeg12data.h"
37 #include "dirac_dwt.h"
40 #include "videodsp.h" // for ff_emulated_edge_mc_8
43 * The spec limits the number of wavelet decompositions to 4 for both
44 * level 1 (VC-2) and 128 (long-gop default).
45 * 5 decompositions is the maximum before >16-bit buffers are needed.
46 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
47 * the others to 4 decompositions (or 3 for the fidelity filter).
49 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
51 #define MAX_DWT_LEVELS 5
54 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
56 #define MAX_REFERENCE_FRAMES 8
57 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
58 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
59 #define MAX_QUANT 68 /* max quant for VC-2 */
60 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
63 * DiracBlock->ref flags, if set then the block does MC from the given ref
65 #define DIRAC_REF_MASK_REF1 1
66 #define DIRAC_REF_MASK_REF2 2
67 #define DIRAC_REF_MASK_GLOBAL 4
70 * Value of Picture.reference when Picture is not a reference picture, but
71 * is held for delayed output.
73 #define DELAYED_PIC_REF 4
75 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
77 #define CALC_PADDING(size, depth) \
78 (((size + (1 << depth) - 1) >> depth) << depth)
80 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
84 int interpolated[3]; /* 1 if hpel[] is valid */
86 uint8_t *hpel_base[3][4];
93 } u; /* anonymous unions aren't in C99 :( */
97 typedef struct SubBand {
105 struct SubBand *parent;
109 const uint8_t *coeff_data;
112 typedef struct Plane {
121 IDWTELEM *idwt_buf_base;
127 /* block separation (block n+1 starts after this many pixels in block n) */
130 /* amount of overspill on each edge (half of the overlap between blocks) */
134 SubBand band[MAX_DWT_LEVELS][4];
137 typedef struct DiracContext {
138 AVCodecContext *avctx;
140 DiracDSPContext diracdsp;
142 dirac_source_params source;
143 int seen_sequence_header;
144 int frame_number; /* number of the next frame to display */
149 int zero_res; /* zero residue flag */
150 int is_arith; /* whether coeffs use arith or golomb coding */
151 int low_delay; /* use the low delay syntax */
152 int globalmc_flag; /* use global motion compensation */
153 int num_refs; /* number of reference pictures */
155 /* wavelet decoding */
156 unsigned wavelet_depth; /* depth of the IDWT */
157 unsigned wavelet_idx;
160 * schroedinger older than 1.0.8 doesn't store
161 * quant delta if only one codebook exists in a band
163 unsigned old_delta_quant;
164 unsigned codeblock_mode;
169 } codeblock[MAX_DWT_LEVELS+1];
172 unsigned num_x; /* number of horizontal slices */
173 unsigned num_y; /* number of vertical slices */
174 AVRational bytes; /* average bytes per slice */
175 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
179 int pan_tilt[2]; /* pan/tilt vector */
180 int zrs[2][2]; /* zoom/rotate/shear matrix */
181 int perspective[2]; /* perspective vector */
183 unsigned perspective_exp;
186 /* motion compensation */
187 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
188 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
189 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
191 int blwidth; /* number of blocks (horizontally) */
192 int blheight; /* number of blocks (vertically) */
193 int sbwidth; /* number of superblocks (horizontally) */
194 int sbheight; /* number of superblocks (vertically) */
197 DiracBlock *blmotion;
199 uint8_t *edge_emu_buffer[4];
200 uint8_t *edge_emu_buffer_base;
202 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
206 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
208 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
209 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
210 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
211 dirac_weight_func weight_func;
212 dirac_biweight_func biweight_func;
214 DiracFrame *current_picture;
215 DiracFrame *ref_pics[2];
217 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
218 DiracFrame *delay_frames[MAX_DELAY+1];
219 DiracFrame all_frames[MAX_FRAMES];
223 * Dirac Specification ->
224 * Parse code values. 9.6.1 Table 9.1
226 enum dirac_parse_code {
227 pc_seq_header = 0x00,
240 static const uint8_t default_qmat[][4][4] = {
241 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
242 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
243 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
244 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
245 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
246 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
247 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
250 static const int qscale_tab[MAX_QUANT+1] = {
251 4, 5, 6, 7, 8, 10, 11, 13,
252 16, 19, 23, 27, 32, 38, 45, 54,
253 64, 76, 91, 108, 128, 152, 181, 215,
254 256, 304, 362, 431, 512, 609, 724, 861,
255 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
256 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
257 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
261 static const int qoffset_intra_tab[MAX_QUANT+1] = {
262 1, 2, 3, 4, 4, 5, 6, 7,
263 8, 10, 12, 14, 16, 19, 23, 27,
264 32, 38, 46, 54, 64, 76, 91, 108,
265 128, 152, 181, 216, 256, 305, 362, 431,
266 512, 609, 724, 861, 1024, 1218, 1448, 1722,
267 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
268 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
272 static const int qoffset_inter_tab[MAX_QUANT+1] = {
273 1, 2, 2, 3, 3, 4, 4, 5,
274 6, 7, 9, 10, 12, 14, 17, 20,
275 24, 29, 34, 41, 48, 57, 68, 81,
276 96, 114, 136, 162, 192, 228, 272, 323,
277 384, 457, 543, 646, 768, 913, 1086, 1292,
278 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
279 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
283 /* magic number division by 3 from schroedinger */
284 static inline int divide3(int x)
286 return ((x+1)*21845 + 10922) >> 16;
289 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
291 DiracFrame *remove_pic = NULL;
292 int i, remove_idx = -1;
294 for (i = 0; framelist[i]; i++)
295 if (framelist[i]->avframe->display_picture_number == picnum) {
296 remove_pic = framelist[i];
301 for (i = remove_idx; framelist[i]; i++)
302 framelist[i] = framelist[i+1];
307 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
310 for (i = 0; i < maxframes; i++)
312 framelist[i] = frame;
318 static int alloc_sequence_buffers(DiracContext *s)
320 int sbwidth = DIVRNDUP(s->source.width, 4);
321 int sbheight = DIVRNDUP(s->source.height, 4);
322 int i, w, h, top_padding;
324 /* todo: think more about this / use or set Plane here */
325 for (i = 0; i < 3; i++) {
326 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
327 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
328 w = s->source.width >> (i ? s->chroma_x_shift : 0);
329 h = s->source.height >> (i ? s->chroma_y_shift : 0);
331 /* we allocate the max we support here since num decompositions can
332 * change from frame to frame. Stride is aligned to 16 for SIMD, and
333 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
334 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
336 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
337 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
338 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
340 s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
341 s->plane[i].idwt_tmp = av_malloc_array((w+16), sizeof(IDWTELEM));
342 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
343 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
344 return AVERROR(ENOMEM);
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc_array(sbwidth, sbheight);
349 s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
351 if (!s->sbsplit || !s->blmotion)
352 return AVERROR(ENOMEM);
356 static int alloc_buffers(DiracContext *s, int stride)
358 int w = s->source.width;
359 int h = s->source.height;
361 av_assert0(stride >= w);
364 if (s->buffer_stride >= stride)
366 s->buffer_stride = 0;
368 av_freep(&s->edge_emu_buffer_base);
369 memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
371 av_freep(&s->mcscratch);
373 s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
375 s->mctmp = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
376 s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
378 if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
379 return AVERROR(ENOMEM);
381 s->buffer_stride = stride;
385 static void free_sequence_buffers(DiracContext *s)
389 for (i = 0; i < MAX_FRAMES; i++) {
390 if (s->all_frames[i].avframe->data[0]) {
391 av_frame_unref(s->all_frames[i].avframe);
392 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
395 for (j = 0; j < 3; j++)
396 for (k = 1; k < 4; k++)
397 av_freep(&s->all_frames[i].hpel_base[j][k]);
400 memset(s->ref_frames, 0, sizeof(s->ref_frames));
401 memset(s->delay_frames, 0, sizeof(s->delay_frames));
403 for (i = 0; i < 3; i++) {
404 av_freep(&s->plane[i].idwt_buf_base);
405 av_freep(&s->plane[i].idwt_tmp);
408 s->buffer_stride = 0;
409 av_freep(&s->sbsplit);
410 av_freep(&s->blmotion);
411 av_freep(&s->edge_emu_buffer_base);
414 av_freep(&s->mcscratch);
417 static av_cold int dirac_decode_init(AVCodecContext *avctx)
419 DiracContext *s = avctx->priv_data;
423 s->frame_number = -1;
425 ff_dsputil_init(&s->dsp, avctx);
426 ff_diracdsp_init(&s->diracdsp);
428 for (i = 0; i < MAX_FRAMES; i++) {
429 s->all_frames[i].avframe = av_frame_alloc();
430 if (!s->all_frames[i].avframe) {
432 av_frame_free(&s->all_frames[--i].avframe);
433 return AVERROR(ENOMEM);
440 static void dirac_decode_flush(AVCodecContext *avctx)
442 DiracContext *s = avctx->priv_data;
443 free_sequence_buffers(s);
444 s->seen_sequence_header = 0;
445 s->frame_number = -1;
448 static av_cold int dirac_decode_end(AVCodecContext *avctx)
450 DiracContext *s = avctx->priv_data;
453 dirac_decode_flush(avctx);
454 for (i = 0; i < MAX_FRAMES; i++)
455 av_frame_free(&s->all_frames[i].avframe);
460 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
462 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
463 SubBand *b, IDWTELEM *buf, int x, int y)
467 int pred_ctx = CTX_ZPZN_F1;
469 /* Check if the parent subband has a 0 in the corresponding position */
471 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
473 if (b->orientation == subband_hl)
474 sign_pred = buf[-b->stride];
476 /* Determine if the pixel has only zeros in its neighbourhood */
478 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
479 if (b->orientation == subband_lh)
482 pred_ctx += !buf[-b->stride];
485 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
487 coeff = (coeff * qfactor + qoffset + 2) >> 2;
488 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
489 coeff = (coeff ^ -sign) + sign;
494 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
498 coeff = svq3_get_ue_golomb(gb);
500 coeff = (coeff * qfactor + qoffset + 2) >> 2;
501 sign = get_bits1(gb);
502 coeff = (coeff ^ -sign) + sign;
508 * Decode the coeffs in the rectangle defined by left, right, top, bottom
509 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
511 static inline void codeblock(DiracContext *s, SubBand *b,
512 GetBitContext *gb, DiracArith *c,
513 int left, int right, int top, int bottom,
514 int blockcnt_one, int is_arith)
516 int x, y, zero_block;
517 int qoffset, qfactor;
520 /* check for any coded coefficients in this codeblock */
523 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
525 zero_block = get_bits1(gb);
531 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
532 int quant = b->quant;
534 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
536 quant += dirac_get_se_golomb(gb);
538 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
544 b->quant = FFMIN(b->quant, MAX_QUANT);
546 qfactor = qscale_tab[b->quant];
547 /* TODO: context pointer? */
549 qoffset = qoffset_intra_tab[b->quant];
551 qoffset = qoffset_inter_tab[b->quant];
553 buf = b->ibuf + top * b->stride;
554 for (y = top; y < bottom; y++) {
555 for (x = left; x < right; x++) {
556 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
558 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
560 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
567 * Dirac Specification ->
568 * 13.3 intra_dc_prediction(band)
570 static inline void intra_dc_prediction(SubBand *b)
572 IDWTELEM *buf = b->ibuf;
575 for (x = 1; x < b->width; x++)
579 for (y = 1; y < b->height; y++) {
580 buf[0] += buf[-b->stride];
582 for (x = 1; x < b->width; x++) {
583 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
584 buf[x] += divide3(pred);
591 * Dirac Specification ->
592 * 13.4.2 Non-skipped subbands. subband_coeffs()
594 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
596 int cb_x, cb_y, left, right, top, bottom;
599 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
600 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
601 int blockcnt_one = (cb_width + cb_height) == 2;
606 init_get_bits8(&gb, b->coeff_data, b->length);
609 ff_dirac_init_arith_decoder(&c, &gb, b->length);
612 for (cb_y = 0; cb_y < cb_height; cb_y++) {
613 bottom = (b->height * (cb_y+1)) / cb_height;
615 for (cb_x = 0; cb_x < cb_width; cb_x++) {
616 right = (b->width * (cb_x+1)) / cb_width;
617 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
623 if (b->orientation == subband_ll && s->num_refs == 0)
624 intra_dc_prediction(b);
627 static int decode_subband_arith(AVCodecContext *avctx, void *b)
629 DiracContext *s = avctx->priv_data;
630 decode_subband_internal(s, b, 1);
634 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
636 DiracContext *s = avctx->priv_data;
638 decode_subband_internal(s, *b, 0);
643 * Dirac Specification ->
644 * [DIRAC_STD] 13.4.1 core_transform_data()
646 static void decode_component(DiracContext *s, int comp)
648 AVCodecContext *avctx = s->avctx;
649 SubBand *bands[3*MAX_DWT_LEVELS+1];
650 enum dirac_subband orientation;
651 int level, num_bands = 0;
653 /* Unpack all subbands at all levels. */
654 for (level = 0; level < s->wavelet_depth; level++) {
655 for (orientation = !!level; orientation < 4; orientation++) {
656 SubBand *b = &s->plane[comp].band[level][orientation];
657 bands[num_bands++] = b;
659 align_get_bits(&s->gb);
660 /* [DIRAC_STD] 13.4.2 subband() */
661 b->length = svq3_get_ue_golomb(&s->gb);
663 b->quant = svq3_get_ue_golomb(&s->gb);
664 align_get_bits(&s->gb);
665 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
666 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
667 skip_bits_long(&s->gb, b->length*8);
670 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
672 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
673 NULL, 4-!!level, sizeof(SubBand));
675 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
677 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
680 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
681 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
682 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
683 int slice_x, int slice_y, int bits_end,
684 SubBand *b1, SubBand *b2)
686 int left = b1->width * slice_x / s->lowdelay.num_x;
687 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
688 int top = b1->height * slice_y / s->lowdelay.num_y;
689 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
691 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
692 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
694 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
695 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
697 /* we have to constantly check for overread since the spec explictly
698 requires this, with the meaning that all remaining coeffs are set to 0 */
699 if (get_bits_count(gb) >= bits_end)
702 for (y = top; y < bottom; y++) {
703 for (x = left; x < right; x++) {
704 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
705 if (get_bits_count(gb) >= bits_end)
708 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
709 if (get_bits_count(gb) >= bits_end)
719 struct lowdelay_slice {
728 * Dirac Specification ->
729 * 13.5.2 Slices. slice(sx,sy)
731 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
733 DiracContext *s = avctx->priv_data;
734 struct lowdelay_slice *slice = arg;
735 GetBitContext *gb = &slice->gb;
736 enum dirac_subband orientation;
737 int level, quant, chroma_bits, chroma_end;
739 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
740 int length_bits = av_log2(8 * slice->bytes)+1;
741 int luma_bits = get_bits_long(gb, length_bits);
742 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
744 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
745 for (level = 0; level < s->wavelet_depth; level++)
746 for (orientation = !!level; orientation < 4; orientation++) {
747 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
748 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
749 &s->plane[0].band[level][orientation], NULL);
752 /* consume any unused bits from luma */
753 skip_bits_long(gb, get_bits_count(gb) - luma_end);
755 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
756 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
757 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
758 for (level = 0; level < s->wavelet_depth; level++)
759 for (orientation = !!level; orientation < 4; orientation++) {
760 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
761 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
762 &s->plane[1].band[level][orientation],
763 &s->plane[2].band[level][orientation]);
770 * Dirac Specification ->
771 * 13.5.1 low_delay_transform_data()
773 static void decode_lowdelay(DiracContext *s)
775 AVCodecContext *avctx = s->avctx;
776 int slice_x, slice_y, bytes, bufsize;
778 struct lowdelay_slice *slices;
781 slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
783 align_get_bits(&s->gb);
784 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
785 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
786 bufsize = get_bits_left(&s->gb);
788 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
789 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
790 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
791 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
793 slices[slice_num].bytes = bytes;
794 slices[slice_num].slice_x = slice_x;
795 slices[slice_num].slice_y = slice_y;
796 init_get_bits(&slices[slice_num].gb, buf, bufsize);
803 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
804 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
805 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
806 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
807 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
811 static void init_planes(DiracContext *s)
813 int i, w, h, level, orientation;
815 for (i = 0; i < 3; i++) {
816 Plane *p = &s->plane[i];
818 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
819 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
820 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
821 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
822 p->idwt_stride = FFALIGN(p->idwt_width, 8);
824 for (level = s->wavelet_depth-1; level >= 0; level--) {
827 for (orientation = !!level; orientation < 4; orientation++) {
828 SubBand *b = &p->band[level][orientation];
830 b->ibuf = p->idwt_buf;
832 b->stride = p->idwt_stride << (s->wavelet_depth - level);
835 b->orientation = orientation;
840 b->ibuf += b->stride>>1;
843 b->parent = &p->band[level-1][orientation];
848 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
849 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
850 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
851 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
854 p->xoffset = (p->xblen - p->xbsep)/2;
855 p->yoffset = (p->yblen - p->ybsep)/2;
860 * Unpack the motion compensation parameters
861 * Dirac Specification ->
862 * 11.2 Picture prediction data. picture_prediction()
864 static int dirac_unpack_prediction_parameters(DiracContext *s)
866 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
867 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
869 GetBitContext *gb = &s->gb;
873 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
874 /* Luma and Chroma are equal. 11.2.3 */
875 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
878 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
883 s->plane[0].xblen = svq3_get_ue_golomb(gb);
884 s->plane[0].yblen = svq3_get_ue_golomb(gb);
885 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
886 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
888 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
889 s->plane[0].xblen = default_blen[idx-1];
890 s->plane[0].yblen = default_blen[idx-1];
891 s->plane[0].xbsep = default_bsep[idx-1];
892 s->plane[0].ybsep = default_bsep[idx-1];
894 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
895 Calculated in function dirac_unpack_block_motion_data */
897 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
898 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
901 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
902 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
905 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
906 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
910 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
911 Read motion vector precision */
912 s->mv_precision = svq3_get_ue_golomb(gb);
913 if (s->mv_precision > 3) {
914 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
918 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
919 Read the global motion compensation parameters */
920 s->globalmc_flag = get_bits1(gb);
921 if (s->globalmc_flag) {
922 memset(s->globalmc, 0, sizeof(s->globalmc));
923 /* [DIRAC_STD] pan_tilt(gparams) */
924 for (ref = 0; ref < s->num_refs; ref++) {
926 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
927 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
929 /* [DIRAC_STD] zoom_rotate_shear(gparams)
930 zoom/rotation/shear parameters */
932 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
933 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
934 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
935 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
936 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
938 s->globalmc[ref].zrs[0][0] = 1;
939 s->globalmc[ref].zrs[1][1] = 1;
941 /* [DIRAC_STD] perspective(gparams) */
943 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
944 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
945 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
950 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
951 Picture prediction mode, not currently used. */
952 if (svq3_get_ue_golomb(gb)) {
953 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
957 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
958 just data read, weight calculation will be done later on. */
959 s->weight_log2denom = 1;
964 s->weight_log2denom = svq3_get_ue_golomb(gb);
965 s->weight[0] = dirac_get_se_golomb(gb);
966 if (s->num_refs == 2)
967 s->weight[1] = dirac_get_se_golomb(gb);
973 * Dirac Specification ->
974 * 11.3 Wavelet transform data. wavelet_transform()
976 static int dirac_unpack_idwt_params(DiracContext *s)
978 GetBitContext *gb = &s->gb;
982 #define CHECKEDREAD(dst, cond, errmsg) \
983 tmp = svq3_get_ue_golomb(gb); \
985 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
992 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
996 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
997 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
999 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1001 if (!s->low_delay) {
1002 /* Codeblock parameters (core syntax only) */
1003 if (get_bits1(gb)) {
1004 for (i = 0; i <= s->wavelet_depth; i++) {
1005 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
1006 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
1009 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1011 for (i = 0; i <= s->wavelet_depth; i++)
1012 s->codeblock[i].width = s->codeblock[i].height = 1;
1014 /* Slice parameters + quantization matrix*/
1015 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1016 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
1017 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
1018 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1019 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1021 if (s->lowdelay.bytes.den <= 0) {
1022 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1023 return AVERROR_INVALIDDATA;
1026 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1027 if (get_bits1(gb)) {
1028 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1029 /* custom quantization matrix */
1030 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1031 for (level = 0; level < s->wavelet_depth; level++) {
1032 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1033 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1034 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1037 if (s->wavelet_depth > 4) {
1038 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1039 return AVERROR_INVALIDDATA;
1041 /* default quantization matrix */
1042 for (level = 0; level < s->wavelet_depth; level++)
1043 for (i = 0; i < 4; i++) {
1044 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1045 /* haar with no shift differs for different depths */
1046 if (s->wavelet_idx == 3)
1047 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1054 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1056 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1063 return sbsplit[-stride];
1065 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1068 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1075 return block[-1].ref & refmask;
1077 return block[-stride].ref & refmask;
1079 /* return the majority */
1080 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1081 return (pred >> 1) & refmask;
1084 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1088 memset(block->u.dc, 0, sizeof(block->u.dc));
1090 if (x && !(block[-1].ref & 3)) {
1091 for (i = 0; i < 3; i++)
1092 block->u.dc[i] += block[-1].u.dc[i];
1096 if (y && !(block[-stride].ref & 3)) {
1097 for (i = 0; i < 3; i++)
1098 block->u.dc[i] += block[-stride].u.dc[i];
1102 if (x && y && !(block[-1-stride].ref & 3)) {
1103 for (i = 0; i < 3; i++)
1104 block->u.dc[i] += block[-1-stride].u.dc[i];
1109 for (i = 0; i < 3; i++)
1110 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1111 } else if (n == 3) {
1112 for (i = 0; i < 3; i++)
1113 block->u.dc[i] = divide3(block->u.dc[i]);
1117 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1120 int refmask = ref+1;
1121 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1124 if (x && (block[-1].ref & mask) == refmask)
1125 pred[n++] = block[-1].u.mv[ref];
1127 if (y && (block[-stride].ref & mask) == refmask)
1128 pred[n++] = block[-stride].u.mv[ref];
1130 if (x && y && (block[-stride-1].ref & mask) == refmask)
1131 pred[n++] = block[-stride-1].u.mv[ref];
1135 block->u.mv[ref][0] = 0;
1136 block->u.mv[ref][1] = 0;
1139 block->u.mv[ref][0] = pred[0][0];
1140 block->u.mv[ref][1] = pred[0][1];
1143 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1144 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1147 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1148 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1153 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1155 int ez = s->globalmc[ref].zrs_exp;
1156 int ep = s->globalmc[ref].perspective_exp;
1157 int (*A)[2] = s->globalmc[ref].zrs;
1158 int *b = s->globalmc[ref].pan_tilt;
1159 int *c = s->globalmc[ref].perspective;
1161 int m = (1<<ep) - (c[0]*x + c[1]*y);
1162 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1163 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1165 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1166 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1169 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1170 int stride, int x, int y)
1174 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1175 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1177 if (s->num_refs == 2) {
1178 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1179 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1183 pred_block_dc(block, stride, x, y);
1184 for (i = 0; i < 3; i++)
1185 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1189 if (s->globalmc_flag) {
1190 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1191 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1194 for (i = 0; i < s->num_refs; i++)
1195 if (block->ref & (i+1)) {
1196 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1197 global_mv(s, block, x, y, i);
1199 pred_mv(block, stride, x, y, i);
1200 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1201 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1207 * Copies the current block to the other blocks covered by the current superblock split mode
1209 static void propagate_block_data(DiracBlock *block, int stride, int size)
1212 DiracBlock *dst = block;
1214 for (x = 1; x < size; x++)
1217 for (y = 1; y < size; y++) {
1219 for (x = 0; x < size; x++)
1225 * Dirac Specification ->
1226 * 12. Block motion data syntax
1228 static int dirac_unpack_block_motion_data(DiracContext *s)
1230 GetBitContext *gb = &s->gb;
1231 uint8_t *sbsplit = s->sbsplit;
1233 DiracArith arith[8];
1237 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1238 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1239 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1240 s->blwidth = 4 * s->sbwidth;
1241 s->blheight = 4 * s->sbheight;
1243 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1244 decode superblock split modes */
1245 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1246 for (y = 0; y < s->sbheight; y++) {
1247 for (x = 0; x < s->sbwidth; x++) {
1248 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1251 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1253 sbsplit += s->sbwidth;
1256 /* setup arith decoding */
1257 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1258 for (i = 0; i < s->num_refs; i++) {
1259 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1260 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1262 for (i = 0; i < 3; i++)
1263 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1265 for (y = 0; y < s->sbheight; y++)
1266 for (x = 0; x < s->sbwidth; x++) {
1267 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1268 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1270 for (q = 0; q < blkcnt; q++)
1271 for (p = 0; p < blkcnt; p++) {
1272 int bx = 4 * x + p*step;
1273 int by = 4 * y + q*step;
1274 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1275 decode_block_params(s, arith, block, s->blwidth, bx, by);
1276 propagate_block_data(block, s->blwidth, step);
1283 static int weight(int i, int blen, int offset)
1285 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1286 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1290 else if (i > blen-1 - 2*offset)
1291 return ROLLOFF(blen-1 - i);
1295 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1296 int left, int right, int wy)
1299 for (x = 0; left && x < p->xblen >> 1; x++)
1300 obmc_weight[x] = wy*8;
1301 for (; x < p->xblen >> right; x++)
1302 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1303 for (; x < p->xblen; x++)
1304 obmc_weight[x] = wy*8;
1305 for (; x < stride; x++)
1309 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1310 int left, int right, int top, int bottom)
1313 for (y = 0; top && y < p->yblen >> 1; y++) {
1314 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1315 obmc_weight += stride;
1317 for (; y < p->yblen >> bottom; y++) {
1318 int wy = weight(y, p->yblen, p->yoffset);
1319 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1320 obmc_weight += stride;
1322 for (; y < p->yblen; y++) {
1323 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1324 obmc_weight += stride;
1328 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1331 int bottom = by == s->blheight-1;
1333 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1334 if (top || bottom || by == 1) {
1335 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1336 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1337 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1341 static const uint8_t epel_weights[4][4][4] = {
1361 * For block x,y, determine which of the hpel planes to do bilinear
1362 * interpolation from and set src[] to the location in each hpel plane
1365 * @return the index of the put_dirac_pixels_tab function to use
1366 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1368 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1369 int x, int y, int ref, int plane)
1371 Plane *p = &s->plane[plane];
1372 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1373 int motion_x = block->u.mv[ref][0];
1374 int motion_y = block->u.mv[ref][1];
1375 int mx, my, i, epel, nplanes = 0;
1378 motion_x >>= s->chroma_x_shift;
1379 motion_y >>= s->chroma_y_shift;
1382 mx = motion_x & ~(-1U << s->mv_precision);
1383 my = motion_y & ~(-1U << s->mv_precision);
1384 motion_x >>= s->mv_precision;
1385 motion_y >>= s->mv_precision;
1386 /* normalize subpel coordinates to epel */
1387 /* TODO: template this function? */
1388 mx <<= 3 - s->mv_precision;
1389 my <<= 3 - s->mv_precision;
1398 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1402 for (i = 0; i < 4; i++)
1403 src[i] = ref_hpel[i] + y*p->stride + x;
1405 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1406 we increment x/y because the edge changes for half of the pixels */
1413 src[0] += p->stride;
1414 src[1] += p->stride;
1422 /* check if we really only need 2 planes since either mx or my is
1423 a hpel position. (epel weights of 0 handle this there) */
1425 /* mx == 0: average [0] and [2]
1426 mx == 4: average [1] and [3] */
1427 src[!mx] = src[2 + !!mx];
1429 } else if (!(my&3)) {
1430 src[0] = src[(my>>1) ];
1431 src[1] = src[(my>>1)+1];
1435 /* adjust the ordering if needed so the weights work */
1437 FFSWAP(const uint8_t *, src[0], src[1]);
1438 FFSWAP(const uint8_t *, src[2], src[3]);
1441 FFSWAP(const uint8_t *, src[0], src[2]);
1442 FFSWAP(const uint8_t *, src[1], src[3]);
1444 src[4] = epel_weights[my&3][mx&3];
1448 /* fixme: v/h _edge_pos */
1449 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1450 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1452 for (i = 0; i < nplanes; i++) {
1453 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i],
1454 p->stride, p->stride,
1455 p->xblen, p->yblen, x, y,
1456 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1457 src[i] = s->edge_emu_buffer[i];
1460 return (nplanes>>1) + epel;
1463 static void add_dc(uint16_t *dst, int dc, int stride,
1464 uint8_t *obmc_weight, int xblen, int yblen)
1469 for (y = 0; y < yblen; y++) {
1470 for (x = 0; x < xblen; x += 2) {
1471 dst[x ] += dc * obmc_weight[x ];
1472 dst[x+1] += dc * obmc_weight[x+1];
1475 obmc_weight += MAX_BLOCKSIZE;
1479 static void block_mc(DiracContext *s, DiracBlock *block,
1480 uint16_t *mctmp, uint8_t *obmc_weight,
1481 int plane, int dstx, int dsty)
1483 Plane *p = &s->plane[plane];
1484 const uint8_t *src[5];
1487 switch (block->ref&3) {
1489 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1493 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1494 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1496 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1497 s->weight[0] + s->weight[1], p->yblen);
1500 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1501 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1502 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1503 if (s->biweight_func) {
1504 /* fixme: +32 is a quick hack */
1505 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1506 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1507 s->weight[0], s->weight[1], p->yblen);
1509 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1512 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1515 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1517 Plane *p = &s->plane[plane];
1518 int x, dstx = p->xbsep - p->xoffset;
1520 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1523 for (x = 1; x < s->blwidth-1; x++) {
1524 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1528 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1531 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1539 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1540 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1541 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1542 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1543 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1544 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1546 s->weight_func = NULL;
1547 s->biweight_func = NULL;
1551 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1553 /* chroma allocates an edge of 8 when subsampled
1554 which for 4:2:2 means an h edge of 16 and v edge of 8
1555 just use 8 for everything for the moment */
1556 int i, edge = EDGE_WIDTH/2;
1558 ref->hpel[plane][0] = ref->avframe->data[plane];
1559 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1561 /* no need for hpel if we only have fpel vectors */
1562 if (!s->mv_precision)
1565 for (i = 1; i < 4; i++) {
1566 if (!ref->hpel_base[plane][i])
1567 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1568 /* we need to be 16-byte aligned even for chroma */
1569 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1572 if (!ref->interpolated[plane]) {
1573 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1574 ref->hpel[plane][3], ref->hpel[plane][0],
1575 ref->avframe->linesize[plane], width, height);
1576 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1577 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1578 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1580 ref->interpolated[plane] = 1;
1584 * Dirac Specification ->
1585 * 13.0 Transform data syntax. transform_data()
1587 static int dirac_decode_frame_internal(DiracContext *s)
1590 int y, i, comp, dsty;
1593 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1594 for (comp = 0; comp < 3; comp++) {
1595 Plane *p = &s->plane[comp];
1596 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1602 for (comp = 0; comp < 3; comp++) {
1603 Plane *p = &s->plane[comp];
1604 uint8_t *frame = s->current_picture->avframe->data[comp];
1606 /* FIXME: small resolutions */
1607 for (i = 0; i < 4; i++)
1608 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1610 if (!s->zero_res && !s->low_delay)
1612 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1613 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1615 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1616 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1619 if (!s->num_refs) { /* intra */
1620 for (y = 0; y < p->height; y += 16) {
1621 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1622 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1623 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1625 } else { /* inter */
1626 int rowheight = p->ybsep*p->stride;
1628 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1630 for (i = 0; i < s->num_refs; i++)
1631 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1633 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1636 for (y = 0; y < s->blheight; y++) {
1638 start = FFMAX(dsty, 0);
1639 uint16_t *mctmp = s->mctmp + y*rowheight;
1640 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1642 init_obmc_weights(s, p, y);
1644 if (y == s->blheight-1 || start+p->ybsep > p->height)
1645 h = p->height - start;
1647 h = p->ybsep - (start - dsty);
1651 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1652 mc_row(s, blocks, mctmp, comp, dsty);
1654 mctmp += (start - dsty)*p->stride + p->xoffset;
1655 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1656 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1657 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1668 static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
1671 int chroma_x_shift, chroma_y_shift;
1672 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
1674 f->width = avctx->width + 2 * EDGE_WIDTH;
1675 f->height = avctx->height + 2 * EDGE_WIDTH + 2;
1676 ret = ff_get_buffer(avctx, f, flags);
1680 for (i = 0; f->data[i]; i++) {
1681 int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
1682 f->linesize[i] + 32;
1683 f->data[i] += offset;
1685 f->width = avctx->width;
1686 f->height = avctx->height;
1692 * Dirac Specification ->
1693 * 11.1.1 Picture Header. picture_header()
1695 static int dirac_decode_picture_header(DiracContext *s)
1698 int i, j, refnum, refdist;
1699 GetBitContext *gb = &s->gb;
1701 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1702 picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1705 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1707 /* if this is the first keyframe after a sequence header, start our
1708 reordering from here */
1709 if (s->frame_number < 0)
1710 s->frame_number = picnum;
1712 s->ref_pics[0] = s->ref_pics[1] = NULL;
1713 for (i = 0; i < s->num_refs; i++) {
1714 refnum = picnum + dirac_get_se_golomb(gb);
1717 /* find the closest reference to the one we want */
1718 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1719 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1720 if (s->ref_frames[j]
1721 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1722 s->ref_pics[i] = s->ref_frames[j];
1723 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1726 if (!s->ref_pics[i] || refdist)
1727 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1729 /* if there were no references at all, allocate one */
1730 if (!s->ref_pics[i])
1731 for (j = 0; j < MAX_FRAMES; j++)
1732 if (!s->all_frames[j].avframe->data[0]) {
1733 s->ref_pics[i] = &s->all_frames[j];
1734 get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1739 /* retire the reference frames that are not used anymore */
1740 if (s->current_picture->avframe->reference) {
1741 retire = picnum + dirac_get_se_golomb(gb);
1742 if (retire != picnum) {
1743 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1746 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1748 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1751 /* if reference array is full, remove the oldest as per the spec */
1752 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1753 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1754 remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1759 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1761 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1764 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1771 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1773 DiracFrame *out = s->delay_frames[0];
1777 /* find frame with lowest picture number */
1778 for (i = 1; s->delay_frames[i]; i++)
1779 if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1780 out = s->delay_frames[i];
1784 for (i = out_idx; s->delay_frames[i]; i++)
1785 s->delay_frames[i] = s->delay_frames[i+1];
1788 out->avframe->reference ^= DELAYED_PIC_REF;
1790 if((ret = av_frame_ref(picture, out->avframe)) < 0)
1798 * Dirac Specification ->
1799 * 9.6 Parse Info Header Syntax. parse_info()
1800 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1802 #define DATA_UNIT_HEADER_SIZE 13
1804 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1805 inside the function parse_sequence() */
1806 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1808 DiracContext *s = avctx->priv_data;
1809 DiracFrame *pic = NULL;
1810 int ret, i, parse_code = buf[4];
1813 if (size < DATA_UNIT_HEADER_SIZE)
1816 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1818 if (parse_code == pc_seq_header) {
1819 if (s->seen_sequence_header)
1822 /* [DIRAC_STD] 10. Sequence header */
1823 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1826 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1828 if (alloc_sequence_buffers(s))
1831 s->seen_sequence_header = 1;
1832 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1833 free_sequence_buffers(s);
1834 s->seen_sequence_header = 0;
1835 } else if (parse_code == pc_aux_data) {
1836 if (buf[13] == 1) { /* encoder implementation/version */
1838 /* versions older than 1.0.8 don't store quant delta for
1839 subbands with only one codeblock */
1840 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1841 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1842 s->old_delta_quant = 1;
1844 } else if (parse_code & 0x8) { /* picture data unit */
1845 if (!s->seen_sequence_header) {
1846 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1850 /* find an unused frame */
1851 for (i = 0; i < MAX_FRAMES; i++)
1852 if (s->all_frames[i].avframe->data[0] == NULL)
1853 pic = &s->all_frames[i];
1855 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1859 av_frame_unref(pic->avframe);
1861 /* [DIRAC_STD] Defined in 9.6.1 ... */
1862 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1864 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1868 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1869 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1870 pic->avframe->reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1871 pic->avframe->key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1872 pic->avframe->pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1874 if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1876 s->current_picture = pic;
1877 s->plane[0].stride = pic->avframe->linesize[0];
1878 s->plane[1].stride = pic->avframe->linesize[1];
1879 s->plane[2].stride = pic->avframe->linesize[2];
1881 if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1882 return AVERROR(ENOMEM);
1884 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1885 if (dirac_decode_picture_header(s))
1888 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1889 if (dirac_decode_frame_internal(s))
1895 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1897 DiracContext *s = avctx->priv_data;
1898 AVFrame *picture = data;
1899 uint8_t *buf = pkt->data;
1900 int buf_size = pkt->size;
1901 int i, data_unit_size, buf_idx = 0;
1904 /* release unused frames */
1905 for (i = 0; i < MAX_FRAMES; i++)
1906 if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1907 av_frame_unref(s->all_frames[i].avframe);
1908 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1911 s->current_picture = NULL;
1914 /* end of stream, so flush delayed pics */
1916 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1919 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1920 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1921 BBCD start code search */
1922 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1923 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1924 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1927 /* BBCD found or end of data */
1928 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1931 data_unit_size = AV_RB32(buf+buf_idx+5);
1932 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1933 if(buf_idx + data_unit_size > buf_size)
1934 av_log(s->avctx, AV_LOG_ERROR,
1935 "Data unit with size %d is larger than input buffer, discarding\n",
1940 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1941 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1943 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1946 buf_idx += data_unit_size;
1949 if (!s->current_picture)
1952 if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1953 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1955 s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1957 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1958 int min_num = s->delay_frames[0]->avframe->display_picture_number;
1959 /* Too many delayed frames, so we display the frame with the lowest pts */
1960 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1961 delayed_frame = s->delay_frames[0];
1963 for (i = 1; s->delay_frames[i]; i++)
1964 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1965 min_num = s->delay_frames[i]->avframe->display_picture_number;
1967 delayed_frame = remove_frame(s->delay_frames, min_num);
1968 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1971 if (delayed_frame) {
1972 delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1973 if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1977 } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1978 /* The right frame at the right time :-) */
1979 if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1985 s->frame_number = picture->display_picture_number + 1;
1990 AVCodec ff_dirac_decoder = {
1992 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1993 .type = AVMEDIA_TYPE_VIDEO,
1994 .id = AV_CODEC_ID_DIRAC,
1995 .priv_data_size = sizeof(DiracContext),
1996 .init = dirac_decode_init,
1997 .close = dirac_decode_end,
1998 .decode = dirac_decode_frame,
1999 .capabilities = CODEC_CAP_DELAY,
2000 .flush = dirac_decode_flush,