2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
35 #include "dirac_arith.h"
36 #include "mpeg12data.h"
37 #include "dirac_dwt.h"
40 #include "videodsp.h" // for ff_emulated_edge_mc_8
43 * The spec limits the number of wavelet decompositions to 4 for both
44 * level 1 (VC-2) and 128 (long-gop default).
45 * 5 decompositions is the maximum before >16-bit buffers are needed.
46 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
47 * the others to 4 decompositions (or 3 for the fidelity filter).
49 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
51 #define MAX_DWT_LEVELS 5
54 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
56 #define MAX_REFERENCE_FRAMES 8
57 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
58 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
59 #define MAX_QUANT 68 /* max quant for VC-2 */
60 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
63 * DiracBlock->ref flags, if set then the block does MC from the given ref
65 #define DIRAC_REF_MASK_REF1 1
66 #define DIRAC_REF_MASK_REF2 2
67 #define DIRAC_REF_MASK_GLOBAL 4
70 * Value of Picture.reference when Picture is not a reference picture, but
71 * is held for delayed output.
73 #define DELAYED_PIC_REF 4
75 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
77 #define CALC_PADDING(size, depth) \
78 (((size + (1 << depth) - 1) >> depth) << depth)
80 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
84 int interpolated[3]; /* 1 if hpel[] is valid */
86 uint8_t *hpel_base[3][4];
93 } u; /* anonymous unions aren't in C99 :( */
97 typedef struct SubBand {
105 struct SubBand *parent;
109 const uint8_t *coeff_data;
112 typedef struct Plane {
121 IDWTELEM *idwt_buf_base;
127 /* block separation (block n+1 starts after this many pixels in block n) */
130 /* amount of overspill on each edge (half of the overlap between blocks) */
134 SubBand band[MAX_DWT_LEVELS][4];
137 typedef struct DiracContext {
138 AVCodecContext *avctx;
140 DiracDSPContext diracdsp;
142 dirac_source_params source;
143 int seen_sequence_header;
144 int frame_number; /* number of the next frame to display */
149 int zero_res; /* zero residue flag */
150 int is_arith; /* whether coeffs use arith or golomb coding */
151 int low_delay; /* use the low delay syntax */
152 int globalmc_flag; /* use global motion compensation */
153 int num_refs; /* number of reference pictures */
155 /* wavelet decoding */
156 unsigned wavelet_depth; /* depth of the IDWT */
157 unsigned wavelet_idx;
160 * schroedinger older than 1.0.8 doesn't store
161 * quant delta if only one codebook exists in a band
163 unsigned old_delta_quant;
164 unsigned codeblock_mode;
169 } codeblock[MAX_DWT_LEVELS+1];
172 unsigned num_x; /* number of horizontal slices */
173 unsigned num_y; /* number of vertical slices */
174 AVRational bytes; /* average bytes per slice */
175 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
179 int pan_tilt[2]; /* pan/tilt vector */
180 int zrs[2][2]; /* zoom/rotate/shear matrix */
181 int perspective[2]; /* perspective vector */
183 unsigned perspective_exp;
186 /* motion compensation */
187 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
188 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
189 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
191 int blwidth; /* number of blocks (horizontally) */
192 int blheight; /* number of blocks (vertically) */
193 int sbwidth; /* number of superblocks (horizontally) */
194 int sbheight; /* number of superblocks (vertically) */
197 DiracBlock *blmotion;
199 uint8_t *edge_emu_buffer[4];
200 uint8_t *edge_emu_buffer_base;
202 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
205 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
207 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
208 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
209 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
210 dirac_weight_func weight_func;
211 dirac_biweight_func biweight_func;
213 DiracFrame *current_picture;
214 DiracFrame *ref_pics[2];
216 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
217 DiracFrame *delay_frames[MAX_DELAY+1];
218 DiracFrame all_frames[MAX_FRAMES];
222 * Dirac Specification ->
223 * Parse code values. 9.6.1 Table 9.1
225 enum dirac_parse_code {
226 pc_seq_header = 0x00,
239 static const uint8_t default_qmat[][4][4] = {
240 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
241 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
242 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
243 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
244 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
245 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
246 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
249 static const int qscale_tab[MAX_QUANT+1] = {
250 4, 5, 6, 7, 8, 10, 11, 13,
251 16, 19, 23, 27, 32, 38, 45, 54,
252 64, 76, 91, 108, 128, 152, 181, 215,
253 256, 304, 362, 431, 512, 609, 724, 861,
254 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
255 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
256 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
260 static const int qoffset_intra_tab[MAX_QUANT+1] = {
261 1, 2, 3, 4, 4, 5, 6, 7,
262 8, 10, 12, 14, 16, 19, 23, 27,
263 32, 38, 46, 54, 64, 76, 91, 108,
264 128, 152, 181, 216, 256, 305, 362, 431,
265 512, 609, 724, 861, 1024, 1218, 1448, 1722,
266 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
267 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
271 static const int qoffset_inter_tab[MAX_QUANT+1] = {
272 1, 2, 2, 3, 3, 4, 4, 5,
273 6, 7, 9, 10, 12, 14, 17, 20,
274 24, 29, 34, 41, 48, 57, 68, 81,
275 96, 114, 136, 162, 192, 228, 272, 323,
276 384, 457, 543, 646, 768, 913, 1086, 1292,
277 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
278 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
282 /* magic number division by 3 from schroedinger */
283 static inline int divide3(int x)
285 return ((x+1)*21845 + 10922) >> 16;
288 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
290 DiracFrame *remove_pic = NULL;
291 int i, remove_idx = -1;
293 for (i = 0; framelist[i]; i++)
294 if (framelist[i]->avframe.display_picture_number == picnum) {
295 remove_pic = framelist[i];
300 for (i = remove_idx; framelist[i]; i++)
301 framelist[i] = framelist[i+1];
306 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
309 for (i = 0; i < maxframes; i++)
311 framelist[i] = frame;
317 static int alloc_sequence_buffers(DiracContext *s)
319 int sbwidth = DIVRNDUP(s->source.width, 4);
320 int sbheight = DIVRNDUP(s->source.height, 4);
321 int i, w, h, top_padding;
323 /* todo: think more about this / use or set Plane here */
324 for (i = 0; i < 3; i++) {
325 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
326 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
327 w = s->source.width >> (i ? s->chroma_x_shift : 0);
328 h = s->source.height >> (i ? s->chroma_y_shift : 0);
330 /* we allocate the max we support here since num decompositions can
331 * change from frame to frame. Stride is aligned to 16 for SIMD, and
332 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
333 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
335 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
336 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
337 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
339 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
340 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
341 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
342 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
343 return AVERROR(ENOMEM);
347 h = s->source.height;
349 /* fixme: allocate using real stride here */
350 s->sbsplit = av_malloc(sbwidth * sbheight);
351 s->blmotion = av_malloc(sbwidth * sbheight * 16 * sizeof(*s->blmotion));
352 s->edge_emu_buffer_base = av_malloc((w+64)*MAX_BLOCKSIZE);
354 s->mctmp = av_malloc((w+64+MAX_BLOCKSIZE) * (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
355 s->mcscratch = av_malloc((w+64)*MAX_BLOCKSIZE);
357 if (!s->sbsplit || !s->blmotion || !s->mctmp || !s->mcscratch)
358 return AVERROR(ENOMEM);
362 static void free_sequence_buffers(DiracContext *s)
366 for (i = 0; i < MAX_FRAMES; i++) {
367 if (s->all_frames[i].avframe.data[0]) {
368 s->avctx->release_buffer(s->avctx, &s->all_frames[i].avframe);
369 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
372 for (j = 0; j < 3; j++)
373 for (k = 1; k < 4; k++)
374 av_freep(&s->all_frames[i].hpel_base[j][k]);
377 memset(s->ref_frames, 0, sizeof(s->ref_frames));
378 memset(s->delay_frames, 0, sizeof(s->delay_frames));
380 for (i = 0; i < 3; i++) {
381 av_freep(&s->plane[i].idwt_buf_base);
382 av_freep(&s->plane[i].idwt_tmp);
385 av_freep(&s->sbsplit);
386 av_freep(&s->blmotion);
387 av_freep(&s->edge_emu_buffer_base);
390 av_freep(&s->mcscratch);
393 static av_cold int dirac_decode_init(AVCodecContext *avctx)
395 DiracContext *s = avctx->priv_data;
397 s->frame_number = -1;
399 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
400 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
401 return AVERROR_PATCHWELCOME;
404 ff_dsputil_init(&s->dsp, avctx);
405 ff_diracdsp_init(&s->diracdsp);
410 static void dirac_decode_flush(AVCodecContext *avctx)
412 DiracContext *s = avctx->priv_data;
413 free_sequence_buffers(s);
414 s->seen_sequence_header = 0;
415 s->frame_number = -1;
418 static av_cold int dirac_decode_end(AVCodecContext *avctx)
420 dirac_decode_flush(avctx);
424 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
426 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
427 SubBand *b, IDWTELEM *buf, int x, int y)
431 int pred_ctx = CTX_ZPZN_F1;
433 /* Check if the parent subband has a 0 in the corresponding position */
435 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
437 if (b->orientation == subband_hl)
438 sign_pred = buf[-b->stride];
440 /* Determine if the pixel has only zeros in its neighbourhood */
442 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
443 if (b->orientation == subband_lh)
446 pred_ctx += !buf[-b->stride];
449 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
451 coeff = (coeff * qfactor + qoffset + 2) >> 2;
452 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
453 coeff = (coeff ^ -sign) + sign;
458 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
462 coeff = svq3_get_ue_golomb(gb);
464 coeff = (coeff * qfactor + qoffset + 2) >> 2;
465 sign = get_bits1(gb);
466 coeff = (coeff ^ -sign) + sign;
472 * Decode the coeffs in the rectangle defined by left, right, top, bottom
473 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
475 static inline void codeblock(DiracContext *s, SubBand *b,
476 GetBitContext *gb, DiracArith *c,
477 int left, int right, int top, int bottom,
478 int blockcnt_one, int is_arith)
480 int x, y, zero_block;
481 int qoffset, qfactor;
484 /* check for any coded coefficients in this codeblock */
487 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
489 zero_block = get_bits1(gb);
495 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
496 int quant = b->quant;
498 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
500 quant += dirac_get_se_golomb(gb);
502 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
508 b->quant = FFMIN(b->quant, MAX_QUANT);
510 qfactor = qscale_tab[b->quant];
511 /* TODO: context pointer? */
513 qoffset = qoffset_intra_tab[b->quant];
515 qoffset = qoffset_inter_tab[b->quant];
517 buf = b->ibuf + top * b->stride;
518 for (y = top; y < bottom; y++) {
519 for (x = left; x < right; x++) {
520 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
522 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
524 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
531 * Dirac Specification ->
532 * 13.3 intra_dc_prediction(band)
534 static inline void intra_dc_prediction(SubBand *b)
536 IDWTELEM *buf = b->ibuf;
539 for (x = 1; x < b->width; x++)
543 for (y = 1; y < b->height; y++) {
544 buf[0] += buf[-b->stride];
546 for (x = 1; x < b->width; x++) {
547 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
548 buf[x] += divide3(pred);
555 * Dirac Specification ->
556 * 13.4.2 Non-skipped subbands. subband_coeffs()
558 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
560 int cb_x, cb_y, left, right, top, bottom;
563 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
564 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
565 int blockcnt_one = (cb_width + cb_height) == 2;
570 init_get_bits(&gb, b->coeff_data, b->length*8);
573 ff_dirac_init_arith_decoder(&c, &gb, b->length);
576 for (cb_y = 0; cb_y < cb_height; cb_y++) {
577 bottom = (b->height * (cb_y+1)) / cb_height;
579 for (cb_x = 0; cb_x < cb_width; cb_x++) {
580 right = (b->width * (cb_x+1)) / cb_width;
581 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
587 if (b->orientation == subband_ll && s->num_refs == 0)
588 intra_dc_prediction(b);
591 static int decode_subband_arith(AVCodecContext *avctx, void *b)
593 DiracContext *s = avctx->priv_data;
594 decode_subband_internal(s, b, 1);
598 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
600 DiracContext *s = avctx->priv_data;
602 decode_subband_internal(s, *b, 0);
607 * Dirac Specification ->
608 * [DIRAC_STD] 13.4.1 core_transform_data()
610 static void decode_component(DiracContext *s, int comp)
612 AVCodecContext *avctx = s->avctx;
613 SubBand *bands[3*MAX_DWT_LEVELS+1];
614 enum dirac_subband orientation;
615 int level, num_bands = 0;
617 /* Unpack all subbands at all levels. */
618 for (level = 0; level < s->wavelet_depth; level++) {
619 for (orientation = !!level; orientation < 4; orientation++) {
620 SubBand *b = &s->plane[comp].band[level][orientation];
621 bands[num_bands++] = b;
623 align_get_bits(&s->gb);
624 /* [DIRAC_STD] 13.4.2 subband() */
625 b->length = svq3_get_ue_golomb(&s->gb);
627 b->quant = svq3_get_ue_golomb(&s->gb);
628 align_get_bits(&s->gb);
629 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
630 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
631 skip_bits_long(&s->gb, b->length*8);
634 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
636 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
637 NULL, 4-!!level, sizeof(SubBand));
639 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
641 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
644 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
645 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
646 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
647 int slice_x, int slice_y, int bits_end,
648 SubBand *b1, SubBand *b2)
650 int left = b1->width * slice_x / s->lowdelay.num_x;
651 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
652 int top = b1->height * slice_y / s->lowdelay.num_y;
653 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
655 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
656 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
658 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
659 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
661 /* we have to constantly check for overread since the spec explictly
662 requires this, with the meaning that all remaining coeffs are set to 0 */
663 if (get_bits_count(gb) >= bits_end)
666 for (y = top; y < bottom; y++) {
667 for (x = left; x < right; x++) {
668 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
669 if (get_bits_count(gb) >= bits_end)
672 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
673 if (get_bits_count(gb) >= bits_end)
683 struct lowdelay_slice {
692 * Dirac Specification ->
693 * 13.5.2 Slices. slice(sx,sy)
695 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
697 DiracContext *s = avctx->priv_data;
698 struct lowdelay_slice *slice = arg;
699 GetBitContext *gb = &slice->gb;
700 enum dirac_subband orientation;
701 int level, quant, chroma_bits, chroma_end;
703 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
704 int length_bits = av_log2(8 * slice->bytes)+1;
705 int luma_bits = get_bits_long(gb, length_bits);
706 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
708 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
709 for (level = 0; level < s->wavelet_depth; level++)
710 for (orientation = !!level; orientation < 4; orientation++) {
711 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
712 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
713 &s->plane[0].band[level][orientation], NULL);
716 /* consume any unused bits from luma */
717 skip_bits_long(gb, get_bits_count(gb) - luma_end);
719 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
720 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
721 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
722 for (level = 0; level < s->wavelet_depth; level++)
723 for (orientation = !!level; orientation < 4; orientation++) {
724 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
725 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
726 &s->plane[1].band[level][orientation],
727 &s->plane[2].band[level][orientation]);
734 * Dirac Specification ->
735 * 13.5.1 low_delay_transform_data()
737 static void decode_lowdelay(DiracContext *s)
739 AVCodecContext *avctx = s->avctx;
740 int slice_x, slice_y, bytes, bufsize;
742 struct lowdelay_slice *slices;
745 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
747 align_get_bits(&s->gb);
748 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
749 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
750 bufsize = get_bits_left(&s->gb);
752 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
753 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
754 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
755 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
757 slices[slice_num].bytes = bytes;
758 slices[slice_num].slice_x = slice_x;
759 slices[slice_num].slice_y = slice_y;
760 init_get_bits(&slices[slice_num].gb, buf, bufsize);
767 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
768 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
769 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
770 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
771 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
775 static void init_planes(DiracContext *s)
777 int i, w, h, level, orientation;
779 for (i = 0; i < 3; i++) {
780 Plane *p = &s->plane[i];
782 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
783 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
784 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
785 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
786 p->idwt_stride = FFALIGN(p->idwt_width, 8);
788 for (level = s->wavelet_depth-1; level >= 0; level--) {
791 for (orientation = !!level; orientation < 4; orientation++) {
792 SubBand *b = &p->band[level][orientation];
794 b->ibuf = p->idwt_buf;
796 b->stride = p->idwt_stride << (s->wavelet_depth - level);
799 b->orientation = orientation;
804 b->ibuf += b->stride>>1;
807 b->parent = &p->band[level-1][orientation];
812 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
813 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
814 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
815 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
818 p->xoffset = (p->xblen - p->xbsep)/2;
819 p->yoffset = (p->yblen - p->ybsep)/2;
824 * Unpack the motion compensation parameters
825 * Dirac Specification ->
826 * 11.2 Picture prediction data. picture_prediction()
828 static int dirac_unpack_prediction_parameters(DiracContext *s)
830 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
831 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
833 GetBitContext *gb = &s->gb;
837 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
838 /* Luma and Chroma are equal. 11.2.3 */
839 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
842 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
847 s->plane[0].xblen = svq3_get_ue_golomb(gb);
848 s->plane[0].yblen = svq3_get_ue_golomb(gb);
849 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
850 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
852 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
853 s->plane[0].xblen = default_blen[idx-1];
854 s->plane[0].yblen = default_blen[idx-1];
855 s->plane[0].xbsep = default_bsep[idx-1];
856 s->plane[0].ybsep = default_bsep[idx-1];
858 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
859 Calculated in function dirac_unpack_block_motion_data */
861 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
862 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
865 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
866 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
869 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
870 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
874 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
875 Read motion vector precision */
876 s->mv_precision = svq3_get_ue_golomb(gb);
877 if (s->mv_precision > 3) {
878 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
882 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
883 Read the global motion compensation parameters */
884 s->globalmc_flag = get_bits1(gb);
885 if (s->globalmc_flag) {
886 memset(s->globalmc, 0, sizeof(s->globalmc));
887 /* [DIRAC_STD] pan_tilt(gparams) */
888 for (ref = 0; ref < s->num_refs; ref++) {
890 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
891 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
893 /* [DIRAC_STD] zoom_rotate_shear(gparams)
894 zoom/rotation/shear parameters */
896 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
897 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
898 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
899 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
900 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
902 s->globalmc[ref].zrs[0][0] = 1;
903 s->globalmc[ref].zrs[1][1] = 1;
905 /* [DIRAC_STD] perspective(gparams) */
907 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
908 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
909 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
914 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
915 Picture prediction mode, not currently used. */
916 if (svq3_get_ue_golomb(gb)) {
917 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
921 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
922 just data read, weight calculation will be done later on. */
923 s->weight_log2denom = 1;
928 s->weight_log2denom = svq3_get_ue_golomb(gb);
929 s->weight[0] = dirac_get_se_golomb(gb);
930 if (s->num_refs == 2)
931 s->weight[1] = dirac_get_se_golomb(gb);
937 * Dirac Specification ->
938 * 11.3 Wavelet transform data. wavelet_transform()
940 static int dirac_unpack_idwt_params(DiracContext *s)
942 GetBitContext *gb = &s->gb;
946 #define CHECKEDREAD(dst, cond, errmsg) \
947 tmp = svq3_get_ue_golomb(gb); \
949 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
956 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
960 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
961 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
963 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
966 /* Codeblock parameters (core syntax only) */
968 for (i = 0; i <= s->wavelet_depth; i++) {
969 CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
970 CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
973 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
975 for (i = 0; i <= s->wavelet_depth; i++)
976 s->codeblock[i].width = s->codeblock[i].height = 1;
978 /* Slice parameters + quantization matrix*/
979 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
980 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
981 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
982 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
983 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
985 if (s->lowdelay.bytes.den <= 0) {
986 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
987 return AVERROR_INVALIDDATA;
990 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
992 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
993 /* custom quantization matrix */
994 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
995 for (level = 0; level < s->wavelet_depth; level++) {
996 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
997 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
998 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1001 if (s->wavelet_depth > 4) {
1002 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1003 return AVERROR_INVALIDDATA;
1005 /* default quantization matrix */
1006 for (level = 0; level < s->wavelet_depth; level++)
1007 for (i = 0; i < 4; i++) {
1008 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1009 /* haar with no shift differs for different depths */
1010 if (s->wavelet_idx == 3)
1011 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1018 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1020 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1027 return sbsplit[-stride];
1029 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1032 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1039 return block[-1].ref & refmask;
1041 return block[-stride].ref & refmask;
1043 /* return the majority */
1044 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1045 return (pred >> 1) & refmask;
1048 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1052 memset(block->u.dc, 0, sizeof(block->u.dc));
1054 if (x && !(block[-1].ref & 3)) {
1055 for (i = 0; i < 3; i++)
1056 block->u.dc[i] += block[-1].u.dc[i];
1060 if (y && !(block[-stride].ref & 3)) {
1061 for (i = 0; i < 3; i++)
1062 block->u.dc[i] += block[-stride].u.dc[i];
1066 if (x && y && !(block[-1-stride].ref & 3)) {
1067 for (i = 0; i < 3; i++)
1068 block->u.dc[i] += block[-1-stride].u.dc[i];
1073 for (i = 0; i < 3; i++)
1074 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1075 } else if (n == 3) {
1076 for (i = 0; i < 3; i++)
1077 block->u.dc[i] = divide3(block->u.dc[i]);
1081 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1084 int refmask = ref+1;
1085 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1088 if (x && (block[-1].ref & mask) == refmask)
1089 pred[n++] = block[-1].u.mv[ref];
1091 if (y && (block[-stride].ref & mask) == refmask)
1092 pred[n++] = block[-stride].u.mv[ref];
1094 if (x && y && (block[-stride-1].ref & mask) == refmask)
1095 pred[n++] = block[-stride-1].u.mv[ref];
1099 block->u.mv[ref][0] = 0;
1100 block->u.mv[ref][1] = 0;
1103 block->u.mv[ref][0] = pred[0][0];
1104 block->u.mv[ref][1] = pred[0][1];
1107 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1108 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1111 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1112 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1117 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1119 int ez = s->globalmc[ref].zrs_exp;
1120 int ep = s->globalmc[ref].perspective_exp;
1121 int (*A)[2] = s->globalmc[ref].zrs;
1122 int *b = s->globalmc[ref].pan_tilt;
1123 int *c = s->globalmc[ref].perspective;
1125 int m = (1<<ep) - (c[0]*x + c[1]*y);
1126 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1127 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1129 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1130 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1133 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1134 int stride, int x, int y)
1138 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1139 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1141 if (s->num_refs == 2) {
1142 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1143 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1147 pred_block_dc(block, stride, x, y);
1148 for (i = 0; i < 3; i++)
1149 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1153 if (s->globalmc_flag) {
1154 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1155 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1158 for (i = 0; i < s->num_refs; i++)
1159 if (block->ref & (i+1)) {
1160 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1161 global_mv(s, block, x, y, i);
1163 pred_mv(block, stride, x, y, i);
1164 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1165 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1171 * Copies the current block to the other blocks covered by the current superblock split mode
1173 static void propagate_block_data(DiracBlock *block, int stride, int size)
1176 DiracBlock *dst = block;
1178 for (x = 1; x < size; x++)
1181 for (y = 1; y < size; y++) {
1183 for (x = 0; x < size; x++)
1189 * Dirac Specification ->
1190 * 12. Block motion data syntax
1192 static int dirac_unpack_block_motion_data(DiracContext *s)
1194 GetBitContext *gb = &s->gb;
1195 uint8_t *sbsplit = s->sbsplit;
1197 DiracArith arith[8];
1201 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1202 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1203 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1204 s->blwidth = 4 * s->sbwidth;
1205 s->blheight = 4 * s->sbheight;
1207 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1208 decode superblock split modes */
1209 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1210 for (y = 0; y < s->sbheight; y++) {
1211 for (x = 0; x < s->sbwidth; x++) {
1212 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1215 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1217 sbsplit += s->sbwidth;
1220 /* setup arith decoding */
1221 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1222 for (i = 0; i < s->num_refs; i++) {
1223 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1224 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1226 for (i = 0; i < 3; i++)
1227 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1229 for (y = 0; y < s->sbheight; y++)
1230 for (x = 0; x < s->sbwidth; x++) {
1231 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1232 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1234 for (q = 0; q < blkcnt; q++)
1235 for (p = 0; p < blkcnt; p++) {
1236 int bx = 4 * x + p*step;
1237 int by = 4 * y + q*step;
1238 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1239 decode_block_params(s, arith, block, s->blwidth, bx, by);
1240 propagate_block_data(block, s->blwidth, step);
1247 static int weight(int i, int blen, int offset)
1249 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1250 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1254 else if (i > blen-1 - 2*offset)
1255 return ROLLOFF(blen-1 - i);
1259 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1260 int left, int right, int wy)
1263 for (x = 0; left && x < p->xblen >> 1; x++)
1264 obmc_weight[x] = wy*8;
1265 for (; x < p->xblen >> right; x++)
1266 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1267 for (; x < p->xblen; x++)
1268 obmc_weight[x] = wy*8;
1269 for (; x < stride; x++)
1273 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1274 int left, int right, int top, int bottom)
1277 for (y = 0; top && y < p->yblen >> 1; y++) {
1278 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1279 obmc_weight += stride;
1281 for (; y < p->yblen >> bottom; y++) {
1282 int wy = weight(y, p->yblen, p->yoffset);
1283 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1284 obmc_weight += stride;
1286 for (; y < p->yblen; y++) {
1287 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1288 obmc_weight += stride;
1292 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1295 int bottom = by == s->blheight-1;
1297 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1298 if (top || bottom || by == 1) {
1299 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1300 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1301 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1305 static const uint8_t epel_weights[4][4][4] = {
1325 * For block x,y, determine which of the hpel planes to do bilinear
1326 * interpolation from and set src[] to the location in each hpel plane
1329 * @return the index of the put_dirac_pixels_tab function to use
1330 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1332 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1333 int x, int y, int ref, int plane)
1335 Plane *p = &s->plane[plane];
1336 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1337 int motion_x = block->u.mv[ref][0];
1338 int motion_y = block->u.mv[ref][1];
1339 int mx, my, i, epel, nplanes = 0;
1342 motion_x >>= s->chroma_x_shift;
1343 motion_y >>= s->chroma_y_shift;
1346 mx = motion_x & ~(-1 << s->mv_precision);
1347 my = motion_y & ~(-1 << s->mv_precision);
1348 motion_x >>= s->mv_precision;
1349 motion_y >>= s->mv_precision;
1350 /* normalize subpel coordinates to epel */
1351 /* TODO: template this function? */
1352 mx <<= 3 - s->mv_precision;
1353 my <<= 3 - s->mv_precision;
1362 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1366 for (i = 0; i < 4; i++)
1367 src[i] = ref_hpel[i] + y*p->stride + x;
1369 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1370 we increment x/y because the edge changes for half of the pixels */
1377 src[0] += p->stride;
1378 src[1] += p->stride;
1386 /* check if we really only need 2 planes since either mx or my is
1387 a hpel position. (epel weights of 0 handle this there) */
1389 /* mx == 0: average [0] and [2]
1390 mx == 4: average [1] and [3] */
1391 src[!mx] = src[2 + !!mx];
1393 } else if (!(my&3)) {
1394 src[0] = src[(my>>1) ];
1395 src[1] = src[(my>>1)+1];
1399 /* adjust the ordering if needed so the weights work */
1401 FFSWAP(const uint8_t *, src[0], src[1]);
1402 FFSWAP(const uint8_t *, src[2], src[3]);
1405 FFSWAP(const uint8_t *, src[0], src[2]);
1406 FFSWAP(const uint8_t *, src[1], src[3]);
1408 src[4] = epel_weights[my&3][mx&3];
1412 /* fixme: v/h _edge_pos */
1413 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1414 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1416 for (i = 0; i < nplanes; i++) {
1417 ff_emulated_edge_mc(s->edge_emu_buffer[i], src[i], p->stride,
1418 p->xblen, p->yblen, x, y,
1419 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1420 src[i] = s->edge_emu_buffer[i];
1423 return (nplanes>>1) + epel;
1426 static void add_dc(uint16_t *dst, int dc, int stride,
1427 uint8_t *obmc_weight, int xblen, int yblen)
1432 for (y = 0; y < yblen; y++) {
1433 for (x = 0; x < xblen; x += 2) {
1434 dst[x ] += dc * obmc_weight[x ];
1435 dst[x+1] += dc * obmc_weight[x+1];
1438 obmc_weight += MAX_BLOCKSIZE;
1442 static void block_mc(DiracContext *s, DiracBlock *block,
1443 uint16_t *mctmp, uint8_t *obmc_weight,
1444 int plane, int dstx, int dsty)
1446 Plane *p = &s->plane[plane];
1447 const uint8_t *src[5];
1450 switch (block->ref&3) {
1452 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1456 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1457 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1459 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1460 s->weight[0] + s->weight[1], p->yblen);
1463 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1464 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1465 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1466 if (s->biweight_func) {
1467 /* fixme: +32 is a quick hack */
1468 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1469 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1470 s->weight[0], s->weight[1], p->yblen);
1472 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1475 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1478 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1480 Plane *p = &s->plane[plane];
1481 int x, dstx = p->xbsep - p->xoffset;
1483 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1486 for (x = 1; x < s->blwidth-1; x++) {
1487 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1491 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1494 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1502 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1503 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1504 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1505 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1506 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1507 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1509 s->weight_func = NULL;
1510 s->biweight_func = NULL;
1514 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1516 /* chroma allocates an edge of 8 when subsampled
1517 which for 4:2:2 means an h edge of 16 and v edge of 8
1518 just use 8 for everything for the moment */
1519 int i, edge = EDGE_WIDTH/2;
1521 ref->hpel[plane][0] = ref->avframe.data[plane];
1522 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1524 /* no need for hpel if we only have fpel vectors */
1525 if (!s->mv_precision)
1528 for (i = 1; i < 4; i++) {
1529 if (!ref->hpel_base[plane][i])
1530 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe.linesize[plane] + 32);
1531 /* we need to be 16-byte aligned even for chroma */
1532 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe.linesize[plane] + 16;
1535 if (!ref->interpolated[plane]) {
1536 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1537 ref->hpel[plane][3], ref->hpel[plane][0],
1538 ref->avframe.linesize[plane], width, height);
1539 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1540 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1541 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe.linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1543 ref->interpolated[plane] = 1;
1547 * Dirac Specification ->
1548 * 13.0 Transform data syntax. transform_data()
1550 static int dirac_decode_frame_internal(DiracContext *s)
1553 int y, i, comp, dsty;
1556 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1557 for (comp = 0; comp < 3; comp++) {
1558 Plane *p = &s->plane[comp];
1559 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1565 for (comp = 0; comp < 3; comp++) {
1566 Plane *p = &s->plane[comp];
1567 uint8_t *frame = s->current_picture->avframe.data[comp];
1569 /* FIXME: small resolutions */
1570 for (i = 0; i < 4; i++)
1571 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1573 if (!s->zero_res && !s->low_delay)
1575 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1576 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1578 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1579 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1582 if (!s->num_refs) { /* intra */
1583 for (y = 0; y < p->height; y += 16) {
1584 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1585 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1586 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1588 } else { /* inter */
1589 int rowheight = p->ybsep*p->stride;
1591 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1593 for (i = 0; i < s->num_refs; i++)
1594 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1596 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1599 for (y = 0; y < s->blheight; y++) {
1601 start = FFMAX(dsty, 0);
1602 uint16_t *mctmp = s->mctmp + y*rowheight;
1603 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1605 init_obmc_weights(s, p, y);
1607 if (y == s->blheight-1 || start+p->ybsep > p->height)
1608 h = p->height - start;
1610 h = p->ybsep - (start - dsty);
1614 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1615 mc_row(s, blocks, mctmp, comp, dsty);
1617 mctmp += (start - dsty)*p->stride + p->xoffset;
1618 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1619 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1620 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1632 * Dirac Specification ->
1633 * 11.1.1 Picture Header. picture_header()
1635 static int dirac_decode_picture_header(DiracContext *s)
1638 int i, j, refnum, refdist;
1639 GetBitContext *gb = &s->gb;
1641 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1642 picnum = s->current_picture->avframe.display_picture_number = get_bits_long(gb, 32);
1645 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1647 /* if this is the first keyframe after a sequence header, start our
1648 reordering from here */
1649 if (s->frame_number < 0)
1650 s->frame_number = picnum;
1652 s->ref_pics[0] = s->ref_pics[1] = NULL;
1653 for (i = 0; i < s->num_refs; i++) {
1654 refnum = picnum + dirac_get_se_golomb(gb);
1657 /* find the closest reference to the one we want */
1658 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1659 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1660 if (s->ref_frames[j]
1661 && FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum) < refdist) {
1662 s->ref_pics[i] = s->ref_frames[j];
1663 refdist = FFABS(s->ref_frames[j]->avframe.display_picture_number - refnum);
1666 if (!s->ref_pics[i] || refdist)
1667 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1669 /* if there were no references at all, allocate one */
1670 if (!s->ref_pics[i])
1671 for (j = 0; j < MAX_FRAMES; j++)
1672 if (!s->all_frames[j].avframe.data[0]) {
1673 s->ref_pics[i] = &s->all_frames[j];
1674 ff_get_buffer(s->avctx, &s->ref_pics[i]->avframe);
1679 /* retire the reference frames that are not used anymore */
1680 if (s->current_picture->avframe.reference) {
1681 retire = picnum + dirac_get_se_golomb(gb);
1682 if (retire != picnum) {
1683 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1686 retire_pic->avframe.reference &= DELAYED_PIC_REF;
1688 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1691 /* if reference array is full, remove the oldest as per the spec */
1692 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1693 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1694 remove_frame(s->ref_frames, s->ref_frames[0]->avframe.display_picture_number)->avframe.reference &= DELAYED_PIC_REF;
1699 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1701 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1704 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1711 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1713 DiracFrame *out = s->delay_frames[0];
1716 /* find frame with lowest picture number */
1717 for (i = 1; s->delay_frames[i]; i++)
1718 if (s->delay_frames[i]->avframe.display_picture_number < out->avframe.display_picture_number) {
1719 out = s->delay_frames[i];
1723 for (i = out_idx; s->delay_frames[i]; i++)
1724 s->delay_frames[i] = s->delay_frames[i+1];
1727 out->avframe.reference ^= DELAYED_PIC_REF;
1729 *(AVFrame *)picture = out->avframe;
1736 * Dirac Specification ->
1737 * 9.6 Parse Info Header Syntax. parse_info()
1738 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1740 #define DATA_UNIT_HEADER_SIZE 13
1742 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1743 inside the function parse_sequence() */
1744 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1746 DiracContext *s = avctx->priv_data;
1747 DiracFrame *pic = NULL;
1748 int i, parse_code = buf[4];
1751 if (size < DATA_UNIT_HEADER_SIZE)
1754 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1756 if (parse_code == pc_seq_header) {
1757 if (s->seen_sequence_header)
1760 /* [DIRAC_STD] 10. Sequence header */
1761 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1764 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1766 if (alloc_sequence_buffers(s))
1769 s->seen_sequence_header = 1;
1770 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1771 free_sequence_buffers(s);
1772 s->seen_sequence_header = 0;
1773 } else if (parse_code == pc_aux_data) {
1774 if (buf[13] == 1) { /* encoder implementation/version */
1776 /* versions older than 1.0.8 don't store quant delta for
1777 subbands with only one codeblock */
1778 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1779 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1780 s->old_delta_quant = 1;
1782 } else if (parse_code & 0x8) { /* picture data unit */
1783 if (!s->seen_sequence_header) {
1784 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1788 /* find an unused frame */
1789 for (i = 0; i < MAX_FRAMES; i++)
1790 if (s->all_frames[i].avframe.data[0] == NULL)
1791 pic = &s->all_frames[i];
1793 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1797 avcodec_get_frame_defaults(&pic->avframe);
1799 /* [DIRAC_STD] Defined in 9.6.1 ... */
1800 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1802 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1806 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1807 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1808 pic->avframe.reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1809 pic->avframe.key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1810 pic->avframe.pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1812 if (ff_get_buffer(avctx, &pic->avframe) < 0) {
1813 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1816 s->current_picture = pic;
1817 s->plane[0].stride = pic->avframe.linesize[0];
1818 s->plane[1].stride = pic->avframe.linesize[1];
1819 s->plane[2].stride = pic->avframe.linesize[2];
1821 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1822 if (dirac_decode_picture_header(s))
1825 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1826 if (dirac_decode_frame_internal(s))
1832 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1834 DiracContext *s = avctx->priv_data;
1835 DiracFrame *picture = data;
1836 uint8_t *buf = pkt->data;
1837 int buf_size = pkt->size;
1838 int i, data_unit_size, buf_idx = 0;
1840 /* release unused frames */
1841 for (i = 0; i < MAX_FRAMES; i++)
1842 if (s->all_frames[i].avframe.data[0] && !s->all_frames[i].avframe.reference) {
1843 avctx->release_buffer(avctx, &s->all_frames[i].avframe);
1844 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1847 s->current_picture = NULL;
1850 /* end of stream, so flush delayed pics */
1852 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1855 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1856 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1857 BBCD start code search */
1858 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1859 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1860 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1863 /* BBCD found or end of data */
1864 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1867 data_unit_size = AV_RB32(buf+buf_idx+5);
1868 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1869 if(buf_idx + data_unit_size > buf_size)
1870 av_log(s->avctx, AV_LOG_ERROR,
1871 "Data unit with size %d is larger than input buffer, discarding\n",
1876 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1877 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1879 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1882 buf_idx += data_unit_size;
1885 if (!s->current_picture)
1888 if (s->current_picture->avframe.display_picture_number > s->frame_number) {
1889 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1891 s->current_picture->avframe.reference |= DELAYED_PIC_REF;
1893 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1894 int min_num = s->delay_frames[0]->avframe.display_picture_number;
1895 /* Too many delayed frames, so we display the frame with the lowest pts */
1896 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1897 delayed_frame = s->delay_frames[0];
1899 for (i = 1; s->delay_frames[i]; i++)
1900 if (s->delay_frames[i]->avframe.display_picture_number < min_num)
1901 min_num = s->delay_frames[i]->avframe.display_picture_number;
1903 delayed_frame = remove_frame(s->delay_frames, min_num);
1904 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1907 if (delayed_frame) {
1908 delayed_frame->avframe.reference ^= DELAYED_PIC_REF;
1909 *(AVFrame*)data = delayed_frame->avframe;
1912 } else if (s->current_picture->avframe.display_picture_number == s->frame_number) {
1913 /* The right frame at the right time :-) */
1914 *(AVFrame*)data = s->current_picture->avframe;
1919 s->frame_number = picture->avframe.display_picture_number + 1;
1924 AVCodec ff_dirac_decoder = {
1926 .type = AVMEDIA_TYPE_VIDEO,
1927 .id = AV_CODEC_ID_DIRAC,
1928 .priv_data_size = sizeof(DiracContext),
1929 .init = dirac_decode_init,
1930 .close = dirac_decode_end,
1931 .decode = dirac_decode_frame,
1932 .capabilities = CODEC_CAP_DELAY,
1933 .flush = dirac_decode_flush,
1934 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),