3 * Copyright (c) 2002 Fabrice Bellard
4 * Copyright (c) 2004 Roman Shaposhnik
7 * Copyright (c) 2003 Roman Shaposhnik
9 * 50 Mbps (DVCPRO50) support
10 * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com>
12 * 100 Mbps (DVCPRO HD) support
13 * Initial code by Daniel Maas <dmaas@maasdigital.com> (funded by BBC R&D)
14 * Final code by Roman Shaposhnik
16 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
17 * of DV technical info.
19 * This file is part of FFmpeg.
21 * FFmpeg is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU Lesser General Public
23 * License as published by the Free Software Foundation; either
24 * version 2.1 of the License, or (at your option) any later version.
26 * FFmpeg is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * Lesser General Public License for more details.
31 * You should have received a copy of the GNU Lesser General Public
32 * License along with FFmpeg; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
40 #define ALT_BITSTREAM_READER
45 #include "simple_idct.h"
47 #include "dv_tablegen.h"
52 typedef struct DVVideoContext {
55 AVCodecContext *avctx;
58 uint8_t dv_zigzag[2][64];
60 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
61 void (*fdct[2])(DCTELEM *block);
62 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
63 me_cmp_func ildct_cmp;
66 #define TEX_VLC_BITS 9
68 /* XXX: also include quantization */
69 static RL_VLC_ELEM dv_rl_vlc[1184];
71 static inline int dv_work_pool_size(const DVprofile *d)
73 int size = d->n_difchan*d->difseg_size*27;
74 if (DV_PROFILE_IS_1080i50(d))
76 if (DV_PROFILE_IS_720p50(d))
81 static inline void dv_calc_mb_coordinates(const DVprofile *d, int chan, int seq, int slot,
84 static const uint8_t off[] = { 2, 6, 8, 0, 4 };
85 static const uint8_t shuf1[] = { 36, 18, 54, 0, 72 };
86 static const uint8_t shuf2[] = { 24, 12, 36, 0, 48 };
87 static const uint8_t shuf3[] = { 18, 9, 27, 0, 36 };
89 static const uint8_t l_start[] = {0, 4, 9, 13, 18, 22, 27, 31, 36, 40};
90 static const uint8_t l_start_shuffled[] = { 9, 4, 13, 0, 18 };
92 static const uint8_t serpent1[] = {0, 1, 2, 2, 1, 0,
97 static const uint8_t serpent2[] = {0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0,
98 0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0,
101 static const uint8_t remap[][2] = {{ 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, /* dummy */
102 { 0, 0}, { 0, 1}, { 0, 2}, { 0, 3}, {10, 0},
103 {10, 1}, {10, 2}, {10, 3}, {20, 0}, {20, 1},
104 {20, 2}, {20, 3}, {30, 0}, {30, 1}, {30, 2},
105 {30, 3}, {40, 0}, {40, 1}, {40, 2}, {40, 3},
106 {50, 0}, {50, 1}, {50, 2}, {50, 3}, {60, 0},
107 {60, 1}, {60, 2}, {60, 3}, {70, 0}, {70, 1},
108 {70, 2}, {70, 3}, { 0,64}, { 0,65}, { 0,66},
109 {10,64}, {10,65}, {10,66}, {20,64}, {20,65},
110 {20,66}, {30,64}, {30,65}, {30,66}, {40,64},
111 {40,65}, {40,66}, {50,64}, {50,65}, {50,66},
112 {60,64}, {60,65}, {60,66}, {70,64}, {70,65},
113 {70,66}, { 0,67}, {20,67}, {40,67}, {60,67}};
118 for (m=0; m<5; m++) {
121 blk = (chan*11+seq)*27+slot;
123 if (chan == 0 && seq == 11) {
132 i = (4*chan + blk + off[m])%11;
135 x = shuf1[m] + (chan&1)*9 + k%9;
136 y = (i*3+k/9)*2 + (chan>>1) + 1;
138 tbl[m] = (x<<1)|(y<<9);
141 blk = (chan*10+seq)*27+slot;
143 i = (4*chan + (seq/5) + 2*blk + off[m])%10;
146 x = shuf1[m]+(chan&1)*9 + k%9;
147 y = (i*3+k/9)*2 + (chan>>1) + 4;
150 x = remap[y][0]+((x-80)<<(y>59));
153 tbl[m] = (x<<1)|(y<<9);
156 blk = (chan*10+seq)*27+slot;
158 i = (4*chan + (seq/5) + 2*blk + off[m])%10;
159 k = (blk/5)%27 + (i&1)*3;
161 x = shuf2[m] + k%6 + 6*(chan&1);
162 y = l_start[i] + k/6 + 45*(chan>>1);
163 tbl[m] = (x<<1)|(y<<9);
166 switch (d->pix_fmt) {
167 case PIX_FMT_YUV422P:
168 x = shuf3[m] + slot/3;
170 ((((seq + off[m]) % d->difseg_size)<<1) + chan)*3;
171 tbl[m] = (x<<1)|(y<<8);
173 case PIX_FMT_YUV420P:
174 x = shuf3[m] + slot/3;
176 ((seq + off[m]) % d->difseg_size)*3;
177 tbl[m] = (x<<1)|(y<<9);
179 case PIX_FMT_YUV411P:
180 i = (seq + off[m]) % d->difseg_size;
181 k = slot + ((m==1||m==2)?3:0);
183 x = l_start_shuffled[m] + k/6;
184 y = serpent2[k] + i*6;
187 tbl[m] = (x<<2)|(y<<8);
196 static int dv_init_dynamic_tables(const DVprofile *d)
199 uint32_t *factor1, *factor2;
200 const int *iweight1, *iweight2;
202 if (!d->work_chunks[dv_work_pool_size(d)-1].buf_offset) {
204 for (c=0; c<d->n_difchan; c++) {
205 for (s=0; s<d->difseg_size; s++) {
207 for (j=0; j<27; j++) {
209 if (!(DV_PROFILE_IS_1080i50(d) && c != 0 && s == 11) &&
210 !(DV_PROFILE_IS_720p50(d) && s > 9)) {
211 dv_calc_mb_coordinates(d, c, s, j, &d->work_chunks[i].mb_coordinates[0]);
212 d->work_chunks[i++].buf_offset = p;
220 if (!d->idct_factor[DV_PROFILE_IS_HD(d)?8191:5631]) {
221 factor1 = &d->idct_factor[0];
222 factor2 = &d->idct_factor[DV_PROFILE_IS_HD(d)?4096:2816];
223 if (d->height == 720) {
224 iweight1 = &dv_iweight_720_y[0];
225 iweight2 = &dv_iweight_720_c[0];
227 iweight1 = &dv_iweight_1080_y[0];
228 iweight2 = &dv_iweight_1080_c[0];
230 if (DV_PROFILE_IS_HD(d)) {
231 for (c = 0; c < 4; c++) {
232 for (s = 0; s < 16; s++) {
233 for (i = 0; i < 64; i++) {
234 *factor1++ = (dv100_qstep[s] << (c + 9)) * iweight1[i];
235 *factor2++ = (dv100_qstep[s] << (c + 9)) * iweight2[i];
240 iweight1 = &dv_iweight_88[0];
241 for (j = 0; j < 2; j++, iweight1 = &dv_iweight_248[0]) {
242 for (s = 0; s < 22; s++) {
243 for (i = c = 0; c < 4; c++) {
244 for (; i < dv_quant_areas[c]; i++) {
245 *factor1 = iweight1[i] << (dv_quant_shifts[s][c] + 1);
246 *factor2++ = (*factor1++) << 1;
257 static av_cold int dvvideo_init(AVCodecContext *avctx)
259 DVVideoContext *s = avctx->priv_data;
266 uint16_t new_dv_vlc_bits[NB_DV_VLC*2];
267 uint8_t new_dv_vlc_len[NB_DV_VLC*2];
268 uint8_t new_dv_vlc_run[NB_DV_VLC*2];
269 int16_t new_dv_vlc_level[NB_DV_VLC*2];
273 /* it's faster to include sign bit in a generic VLC parsing scheme */
274 for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) {
275 new_dv_vlc_bits[j] = dv_vlc_bits[i];
276 new_dv_vlc_len[j] = dv_vlc_len[i];
277 new_dv_vlc_run[j] = dv_vlc_run[i];
278 new_dv_vlc_level[j] = dv_vlc_level[i];
280 if (dv_vlc_level[i]) {
281 new_dv_vlc_bits[j] <<= 1;
285 new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
286 new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
287 new_dv_vlc_run[j] = dv_vlc_run[i];
288 new_dv_vlc_level[j] = -dv_vlc_level[i];
292 /* NOTE: as a trick, we use the fact the no codes are unused
293 to accelerate the parsing of partial codes */
294 init_vlc(&dv_vlc, TEX_VLC_BITS, j,
295 new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
296 assert(dv_vlc.table_size == 1184);
298 for (i = 0; i < dv_vlc.table_size; i++){
299 int code = dv_vlc.table[i][0];
300 int len = dv_vlc.table[i][1];
303 if (len < 0){ //more bits needed
307 run = new_dv_vlc_run [code] + 1;
308 level = new_dv_vlc_level[code];
310 dv_rl_vlc[i].len = len;
311 dv_rl_vlc[i].level = level;
312 dv_rl_vlc[i].run = run;
316 dv_vlc_map_tableinit();
319 /* Generic DSP setup */
320 dsputil_init(&dsp, avctx);
321 ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
322 s->get_pixels = dsp.get_pixels;
323 s->ildct_cmp = dsp.ildct_cmp[5];
326 s->fdct[0] = dsp.fdct;
327 s->idct_put[0] = dsp.idct_put;
328 for (i = 0; i < 64; i++)
329 s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
332 s->fdct[1] = dsp.fdct248;
333 s->idct_put[1] = ff_simple_idct248_put; // FIXME: need to add it to DSP
335 for (i = 0; i < 64; i++){
336 int j = ff_zigzag248_direct[i];
337 s->dv_zigzag[1][i] = dsp.idct_permutation[(j & 7) + (j & 8) * 4 + (j & 48) / 2];
340 memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
342 avctx->coded_frame = &s->picture;
344 avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;
349 static av_cold int dvvideo_init_encoder(AVCodecContext *avctx)
351 if (!ff_dv_codec_profile(avctx)) {
352 av_log(avctx, AV_LOG_ERROR, "Found no DV profile for %ix%i %s video\n",
353 avctx->width, avctx->height, avcodec_get_pix_fmt_name(avctx->pix_fmt));
357 return dvvideo_init(avctx);
361 // #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
363 typedef struct BlockInfo {
364 const uint32_t *factor_table;
365 const uint8_t *scan_table;
366 uint8_t pos; /* position in block */
367 void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
368 uint8_t partial_bit_count;
369 uint16_t partial_bit_buffer;
373 /* bit budget for AC only in 5 MBs */
374 static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5;
375 /* see dv_88_areas and dv_248_areas for details */
376 static const int mb_area_start[5] = { 1, 6, 21, 43, 64 };
378 static inline int put_bits_left(PutBitContext* s)
380 return (s->buf_end - s->buf) * 8 - put_bits_count(s);
383 /* decode ac coefficients */
384 static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
386 int last_index = gb->size_in_bits;
387 const uint8_t *scan_table = mb->scan_table;
388 const uint32_t *factor_table = mb->factor_table;
390 int partial_bit_count = mb->partial_bit_count;
391 int level, run, vlc_len, index;
394 UPDATE_CACHE(re, gb);
396 /* if we must parse a partial vlc, we do it here */
397 if (partial_bit_count > 0) {
398 re_cache = ((unsigned)re_cache >> partial_bit_count) |
399 (mb->partial_bit_buffer << (sizeof(re_cache) * 8 - partial_bit_count));
400 re_index -= partial_bit_count;
401 mb->partial_bit_count = 0;
404 /* get the AC coefficients until last_index is reached */
407 printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index);
409 /* our own optimized GET_RL_VLC */
410 index = NEG_USR32(re_cache, TEX_VLC_BITS);
411 vlc_len = dv_rl_vlc[index].len;
413 index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
414 vlc_len = TEX_VLC_BITS - vlc_len;
416 level = dv_rl_vlc[index].level;
417 run = dv_rl_vlc[index].run;
419 /* gotta check if we're still within gb boundaries */
420 if (re_index + vlc_len > last_index) {
421 /* should be < 16 bits otherwise a codeword could have been parsed */
422 mb->partial_bit_count = last_index - re_index;
423 mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
424 re_index = last_index;
430 printf("run=%d level=%d\n", run, level);
436 level = (level * factor_table[pos] + (1 << (dv_iweight_bits - 1))) >> dv_iweight_bits;
437 block[scan_table[pos]] = level;
439 UPDATE_CACHE(re, gb);
441 CLOSE_READER(re, gb);
445 static inline void bit_copy(PutBitContext *pb, GetBitContext *gb)
447 int bits_left = get_bits_left(gb);
448 while (bits_left >= MIN_CACHE_BITS) {
449 put_bits(pb, MIN_CACHE_BITS, get_bits(gb, MIN_CACHE_BITS));
450 bits_left -= MIN_CACHE_BITS;
453 put_bits(pb, bits_left, get_bits(gb, bits_left));
457 static inline void dv_calculate_mb_xy(DVVideoContext *s, DVwork_chunk *work_chunk, int m, int *mb_x, int *mb_y)
459 *mb_x = work_chunk->mb_coordinates[m] & 0xff;
460 *mb_y = work_chunk->mb_coordinates[m] >> 8;
462 /* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */
463 if (s->sys->height == 720 && !(s->buf[1]&0x0C)) {
464 *mb_y -= (*mb_y>17)?18:-72; /* shifting the Y coordinate down by 72/2 macro blocks */
468 /* mb_x and mb_y are in units of 8 pixels */
469 static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
471 DVVideoContext *s = avctx->priv_data;
472 DVwork_chunk *work_chunk = arg;
473 int quant, dc, dct_mode, class1, j;
474 int mb_index, mb_x, mb_y, last_index;
475 int y_stride, linesize;
476 DCTELEM *block, *block1;
479 const uint8_t *buf_ptr;
480 PutBitContext pb, vs_pb;
482 BlockInfo mb_data[5 * DV_MAX_BPM], *mb, *mb1;
483 LOCAL_ALIGNED_16(DCTELEM, sblock, [5*DV_MAX_BPM], [64]);
484 LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [80 + 4]); /* allow some slack */
485 LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [5 * 80 + 4]); /* allow some slack */
486 const int log2_blocksize = 3-s->avctx->lowres;
487 int is_field_mode[5];
489 assert((((int)mb_bit_buffer) & 7) == 0);
490 assert((((int)vs_bit_buffer) & 7) == 0);
492 memset(sblock, 0, 5*DV_MAX_BPM*sizeof(*sblock));
494 /* pass 1 : read DC and AC coefficients in blocks */
495 buf_ptr = &s->buf[work_chunk->buf_offset*80];
496 block1 = &sblock[0][0];
498 init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80);
499 for (mb_index = 0; mb_index < 5; mb_index++, mb1 += s->sys->bpm, block1 += s->sys->bpm * 64) {
501 quant = buf_ptr[3] & 0x0f;
503 init_put_bits(&pb, mb_bit_buffer, 80);
506 is_field_mode[mb_index] = 0;
507 for (j = 0; j < s->sys->bpm; j++) {
508 last_index = s->sys->block_sizes[j];
509 init_get_bits(&gb, buf_ptr, last_index);
512 dc = get_sbits(&gb, 9);
513 dct_mode = get_bits1(&gb);
514 class1 = get_bits(&gb, 2);
515 if (DV_PROFILE_IS_HD(s->sys)) {
516 mb->idct_put = s->idct_put[0];
517 mb->scan_table = s->dv_zigzag[0];
518 mb->factor_table = &s->sys->idct_factor[(j >= 4)*4*16*64 + class1*16*64 + quant*64];
519 is_field_mode[mb_index] |= !j && dct_mode;
521 mb->idct_put = s->idct_put[dct_mode && log2_blocksize == 3];
522 mb->scan_table = s->dv_zigzag[dct_mode];
523 mb->factor_table = &s->sys->idct_factor[(class1 == 3)*2*22*64 + dct_mode*22*64 +
524 (quant + dv_quant_offset[class1])*64];
527 /* convert to unsigned because 128 is not added in the
531 buf_ptr += last_index >> 3;
533 mb->partial_bit_count = 0;
536 printf("MB block: %d, %d ", mb_index, j);
538 dv_decode_ac(&gb, mb, block);
540 /* write the remaining bits in a new buffer only if the
549 /* pass 2 : we can do it just after */
551 printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
555 init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
557 for (j = 0; j < s->sys->bpm; j++, block += 64, mb++) {
558 if (mb->pos < 64 && get_bits_left(&gb) > 0) {
559 dv_decode_ac(&gb, mb, block);
560 /* if still not finished, no need to parse other blocks */
565 /* all blocks are finished, so the extra bytes can be used at
566 the video segment level */
567 if (j >= s->sys->bpm)
568 bit_copy(&vs_pb, &gb);
571 /* we need a pass other the whole video segment */
573 printf("***pass 3 size=%d\n", put_bits_count(&vs_pb));
575 block = &sblock[0][0];
577 init_get_bits(&gb, vs_bit_buffer, put_bits_count(&vs_pb));
578 flush_put_bits(&vs_pb);
579 for (mb_index = 0; mb_index < 5; mb_index++) {
580 for (j = 0; j < s->sys->bpm; j++) {
583 printf("start %d:%d\n", mb_index, j);
585 dv_decode_ac(&gb, mb, block);
587 if (mb->pos >= 64 && mb->pos < 127)
588 av_log(avctx, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
594 /* compute idct and place blocks */
595 block = &sblock[0][0];
597 for (mb_index = 0; mb_index < 5; mb_index++) {
598 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
600 /* idct_put'ting luminance */
601 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) ||
602 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) ||
603 (s->sys->height >= 720 && mb_y != 134)) {
604 y_stride = (s->picture.linesize[0] << ((!is_field_mode[mb_index]) * log2_blocksize));
606 y_stride = (2 << log2_blocksize);
608 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << log2_blocksize);
609 linesize = s->picture.linesize[0] << is_field_mode[mb_index];
610 mb[0] .idct_put(y_ptr , linesize, block + 0*64);
611 if (s->sys->video_stype == 4) { /* SD 422 */
612 mb[2].idct_put(y_ptr + (1 << log2_blocksize) , linesize, block + 2*64);
614 mb[1].idct_put(y_ptr + (1 << log2_blocksize) , linesize, block + 1*64);
615 mb[2].idct_put(y_ptr + y_stride, linesize, block + 2*64);
616 mb[3].idct_put(y_ptr + (1 << log2_blocksize) + y_stride, linesize, block + 3*64);
621 /* idct_put'ting chrominance */
622 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] +
623 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << log2_blocksize);
624 for (j = 2; j; j--) {
625 uint8_t *c_ptr = s->picture.data[j] + c_offset;
626 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
627 uint64_t aligned_pixels[64/8];
628 uint8_t *pixels = (uint8_t*)aligned_pixels;
629 uint8_t *c_ptr1, *ptr1;
631 mb->idct_put(pixels, 8, block);
632 for (y = 0; y < (1 << log2_blocksize); y++, c_ptr += s->picture.linesize[j], pixels += 8) {
633 ptr1 = pixels + (1 << (log2_blocksize - 1));
634 c_ptr1 = c_ptr + (s->picture.linesize[j] << log2_blocksize);
635 for (x = 0; x < (1 << (log2_blocksize - 1)); x++) {
636 c_ptr[x] = pixels[x];
642 y_stride = (mb_y == 134) ? (1 << log2_blocksize) :
643 s->picture.linesize[j] << ((!is_field_mode[mb_index]) * log2_blocksize);
644 linesize = s->picture.linesize[j] << is_field_mode[mb_index];
645 (mb++)-> idct_put(c_ptr , linesize, block); block += 64;
646 if (s->sys->bpm == 8) {
647 (mb++)->idct_put(c_ptr + y_stride, linesize, block); block += 64;
656 /* Converts run and level (where level != 0) pair into vlc, returning bit size */
657 static av_always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
660 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
661 *vlc = dv_vlc_map[run][level].vlc | sign;
662 size = dv_vlc_map[run][level].size;
665 if (level < DV_VLC_MAP_LEV_SIZE) {
666 *vlc = dv_vlc_map[0][level].vlc | sign;
667 size = dv_vlc_map[0][level].size;
669 *vlc = 0xfe00 | (level << 1) | sign;
673 *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
674 (0x1f80 | (run - 1))) << size;
675 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
682 static av_always_inline int dv_rl2vlc_size(int run, int level)
686 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
687 size = dv_vlc_map[run][level].size;
690 size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
692 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
698 static av_always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
700 *vlc = dv_vlc_map[run][l].vlc | sign;
701 return dv_vlc_map[run][l].size;
704 static av_always_inline int dv_rl2vlc_size(int run, int l)
706 return dv_vlc_map[run][l].size;
710 typedef struct EncBlockInfo {
720 uint8_t partial_bit_count;
721 uint32_t partial_bit_buffer; /* we can't use uint16_t here */
724 static av_always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi,
725 PutBitContext* pb_pool,
726 PutBitContext* pb_end)
729 PutBitContext* pb = pb_pool;
730 int size = bi->partial_bit_count;
731 uint32_t vlc = bi->partial_bit_buffer;
733 bi->partial_bit_count = bi->partial_bit_buffer = 0;
735 /* Find suitable storage space */
736 for (; size > (bits_left = put_bits_left(pb)); pb++) {
739 put_bits(pb, bits_left, vlc >> size);
740 vlc = vlc & ((1 << size) - 1);
742 if (pb + 1 >= pb_end) {
743 bi->partial_bit_count = size;
744 bi->partial_bit_buffer = vlc;
750 put_bits(pb, size, vlc);
752 if (bi->cur_ac >= 64)
755 /* Construct the next VLC */
757 bi->cur_ac = bi->next[prev];
758 if (bi->cur_ac < 64){
759 size = dv_rl2vlc(bi->cur_ac - prev - 1, bi->mb[bi->cur_ac], bi->sign[bi->cur_ac], &vlc);
761 size = 4; vlc = 6; /* End Of Block stamp */
767 static av_always_inline int dv_guess_dct_mode(DVVideoContext *s, uint8_t *data, int linesize) {
768 if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
769 int ps = s->ildct_cmp(NULL, data, NULL, linesize, 8) - 400;
771 int is = s->ildct_cmp(NULL, data , NULL, linesize<<1, 4) +
772 s->ildct_cmp(NULL, data + linesize, NULL, linesize<<1, 4);
780 static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, int linesize, DVVideoContext *s, int bias)
783 const uint8_t* zigzag_scan;
784 LOCAL_ALIGNED_16(DCTELEM, blk, [64]);
786 /* We offer two different methods for class number assignment: the
787 method suggested in SMPTE 314M Table 22, and an improved
788 method. The SMPTE method is very conservative; it assigns class
789 3 (i.e. severe quantization) to any block where the largest AC
790 component is greater than 36. FFmpeg's DV encoder tracks AC bit
791 consumption precisely, so there is no need to bias most blocks
792 towards strongly lossy compression. Instead, we assign class 2
793 to most blocks, and use class 3 only when strictly necessary
794 (for blocks whose largest AC component exceeds 255). */
796 #if 0 /* SMPTE spec method */
797 static const int classes[] = {12, 24, 36, 0xffff};
798 #else /* improved FFmpeg method */
799 static const int classes[] = {-1, -1, 255, 0xffff};
801 int max = classes[0];
804 assert((((int)blk) & 15) == 0);
806 bi->area_q[0] = bi->area_q[1] = bi->area_q[2] = bi->area_q[3] = 0;
807 bi->partial_bit_count = 0;
808 bi->partial_bit_buffer = 0;
811 bi->dct_mode = dv_guess_dct_mode(s, data, linesize);
812 s->get_pixels(blk, data, linesize);
813 s->fdct[bi->dct_mode](blk);
815 /* We rely on the fact that encoding all zeros leads to an immediate EOB,
816 which is precisely what the spec calls for in the "dummy" blocks. */
817 memset(blk, 0, 64*sizeof(*blk));
822 zigzag_scan = bi->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct;
823 weight = bi->dct_mode ? dv_weight_248 : dv_weight_88;
825 for (area = 0; area < 4; area++) {
826 bi->prev[area] = prev;
827 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
828 for (i = mb_area_start[area]; i < mb_area_start[area+1]; i++) {
829 int level = blk[zigzag_scan[i]];
831 if (level + 15 > 30U) {
832 bi->sign[i] = (level >> 31) & 1;
833 /* weigh it and and shift down into range, adding for rounding */
834 /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT
835 AND the 2x doubling of the weights */
836 level = (FFABS(level) * weight[i] + (1 << (dv_weight_bits+3))) >> (dv_weight_bits+4);
840 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level);
847 for (bi->cno = 0; max > classes[bi->cno]; bi->cno++);
855 for (area = 0; area < 4; area++) {
856 bi->prev[area] = prev;
857 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
858 for (; i < mb_area_start[area+1]; i = bi->next[i]) {
862 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, bi->mb[i]);
871 return bi->bit_size[0] + bi->bit_size[1] + bi->bit_size[2] + bi->bit_size[3];
874 static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
877 int i, j, k, a, prev, a2;
880 size[0] = size[1] = size[2] = size[3] = size[4] = 1 << 24;
883 for (i = 0; i < 5; i++) {
889 for (j = 0; j < 6; j++, b++) {
890 for (a = 0; a < 4; a++) {
891 if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
892 b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
895 assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]);
896 for (k = b->next[prev] ; k < mb_area_start[a+1]; k = b->next[k]) {
899 b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
902 if (b->next[k] >= mb_area_start[a+1] && b->next[k]<64){
903 for (a2 = a + 1; b->next[k] >= mb_area_start[a2+1]; a2++)
906 assert(b->mb[b->next[k]]);
907 b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]])
908 -dv_rl2vlc_size(b->next[k] - k - 1, b->mb[b->next[k]]);
909 assert(b->prev[a2] == k && (a2 + 1 >= 4 || b->prev[a2+1] != k));
912 b->next[prev] = b->next[k];
917 size[i] += b->bit_size[a];
920 if (vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4])
923 } while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]);
926 for (a = 2; a == 2 || vs_total_ac_bits < size[0]; a += a){
928 size[0] = 5 * 6 * 4; //EOB
929 for (j = 0; j < 6 *5; j++, b++) {
931 for (k = b->next[prev]; k < 64; k = b->next[k]) {
932 if (b->mb[k] < a && b->mb[k] > -a){
933 b->next[prev] = b->next[k];
935 size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
943 static int dv_encode_video_segment(AVCodecContext *avctx, void *arg)
945 DVVideoContext *s = avctx->priv_data;
946 DVwork_chunk *work_chunk = arg;
948 int mb_x, mb_y, c_offset, linesize, y_stride;
951 LOCAL_ALIGNED_8(uint8_t, scratch, [64]);
952 EncBlockInfo enc_blks[5*DV_MAX_BPM];
953 PutBitContext pbs[5*DV_MAX_BPM];
955 EncBlockInfo* enc_blk;
957 int qnos[5] = {15, 15, 15, 15, 15}; /* No quantization */
958 int* qnosp = &qnos[0];
960 dif = &s->buf[work_chunk->buf_offset*80];
961 enc_blk = &enc_blks[0];
962 for (mb_index = 0; mb_index < 5; mb_index++) {
963 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
965 /* initializing luminance blocks */
966 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) ||
967 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) ||
968 (s->sys->height >= 720 && mb_y != 134)) {
969 y_stride = s->picture.linesize[0] << 3;
973 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3);
974 linesize = s->picture.linesize[0];
976 if (s->sys->video_stype == 4) { /* SD 422 */
978 dv_init_enc_block(enc_blk+0, y_ptr , linesize, s, 0) +
979 dv_init_enc_block(enc_blk+1, NULL , linesize, s, 0) +
980 dv_init_enc_block(enc_blk+2, y_ptr + 8 , linesize, s, 0) +
981 dv_init_enc_block(enc_blk+3, NULL , linesize, s, 0);
984 dv_init_enc_block(enc_blk+0, y_ptr , linesize, s, 0) +
985 dv_init_enc_block(enc_blk+1, y_ptr + 8 , linesize, s, 0) +
986 dv_init_enc_block(enc_blk+2, y_ptr + y_stride, linesize, s, 0) +
987 dv_init_enc_block(enc_blk+3, y_ptr + 8 + y_stride, linesize, s, 0);
991 /* initializing chrominance blocks */
992 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] +
993 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3);
994 for (j = 2; j; j--) {
995 uint8_t *c_ptr = s->picture.data[j] + c_offset;
996 linesize = s->picture.linesize[j];
997 y_stride = (mb_y == 134) ? 8 : (s->picture.linesize[j] << 3);
998 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
1000 uint8_t* b = scratch;
1001 for (i = 0; i < 8; i++) {
1002 d = c_ptr + (linesize << 3);
1003 b[0] = c_ptr[0]; b[1] = c_ptr[1]; b[2] = c_ptr[2]; b[3] = c_ptr[3];
1004 b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
1012 vs_bit_size += dv_init_enc_block( enc_blk++, c_ptr , linesize, s, 1);
1013 if (s->sys->bpm == 8) {
1014 vs_bit_size += dv_init_enc_block(enc_blk++, c_ptr + y_stride, linesize, s, 1);
1019 if (vs_total_ac_bits < vs_bit_size)
1020 dv_guess_qnos(&enc_blks[0], qnosp);
1022 /* DIF encoding process */
1023 for (j=0; j<5*s->sys->bpm;) {
1029 /* First pass over individual cells only */
1030 for (i=0; i<s->sys->bpm; i++, j++) {
1031 int sz = s->sys->block_sizes[i]>>3;
1033 init_put_bits(&pbs[j], dif, sz);
1034 put_sbits(&pbs[j], 9, ((enc_blks[j].mb[0] >> 3) - 1024 + 2) >> 2);
1035 put_bits(&pbs[j], 1, enc_blks[j].dct_mode);
1036 put_bits(&pbs[j], 2, enc_blks[j].cno);
1038 dv_encode_ac(&enc_blks[j], &pbs[j], &pbs[j+1]);
1042 /* Second pass over each MB space */
1043 pb = &pbs[start_mb];
1044 for (i=0; i<s->sys->bpm; i++) {
1045 if (enc_blks[start_mb+i].partial_bit_count)
1046 pb = dv_encode_ac(&enc_blks[start_mb+i], pb, &pbs[start_mb+s->sys->bpm]);
1050 /* Third and final pass over the whole video segment space */
1052 for (j=0; j<5*s->sys->bpm; j++) {
1053 if (enc_blks[j].partial_bit_count)
1054 pb = dv_encode_ac(&enc_blks[j], pb, &pbs[s->sys->bpm*5]);
1055 if (enc_blks[j].partial_bit_count)
1056 av_log(avctx, AV_LOG_ERROR, "ac bitstream overflow\n");
1059 for (j=0; j<5*s->sys->bpm; j++) {
1061 int size = pbs[j].size_in_bits >> 3;
1062 flush_put_bits(&pbs[j]);
1063 pos = put_bits_count(&pbs[j]) >> 3;
1065 av_log(avctx, AV_LOG_ERROR, "bitstream written beyond buffer size\n");
1068 memset(pbs[j].buf + pos, 0xff, size - pos);
1074 #if CONFIG_DVVIDEO_DECODER
1075 /* NOTE: exactly one frame must be given (120000 bytes for NTSC,
1076 144000 bytes for PAL - or twice those for 50Mbps) */
1077 static int dvvideo_decode_frame(AVCodecContext *avctx,
1078 void *data, int *data_size,
1081 const uint8_t *buf = avpkt->data;
1082 int buf_size = avpkt->size;
1083 DVVideoContext *s = avctx->priv_data;
1084 const uint8_t* vsc_pack;
1087 s->sys = ff_dv_frame_profile(s->sys, buf, buf_size);
1088 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys)) {
1089 av_log(avctx, AV_LOG_ERROR, "could not find dv frame profile\n");
1090 return -1; /* NOTE: we only accept several full frames */
1093 if (s->picture.data[0])
1094 avctx->release_buffer(avctx, &s->picture);
1096 s->picture.reference = 0;
1097 s->picture.key_frame = 1;
1098 s->picture.pict_type = FF_I_TYPE;
1099 avctx->pix_fmt = s->sys->pix_fmt;
1100 avctx->time_base = s->sys->time_base;
1101 avcodec_set_dimensions(avctx, s->sys->width, s->sys->height);
1102 if (avctx->get_buffer(avctx, &s->picture) < 0) {
1103 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1106 s->picture.interlaced_frame = 1;
1107 s->picture.top_field_first = 0;
1110 avctx->execute(avctx, dv_decode_video_segment, s->sys->work_chunks, NULL,
1111 dv_work_pool_size(s->sys), sizeof(DVwork_chunk));
1116 *data_size = sizeof(AVFrame);
1117 *(AVFrame*)data = s->picture;
1119 /* Determine the codec's sample_aspect ratio from the packet */
1120 vsc_pack = buf + 80*5 + 48 + 5;
1121 if ( *vsc_pack == dv_video_control ) {
1122 apt = buf[4] & 0x07;
1123 is16_9 = (vsc_pack && ((vsc_pack[2] & 0x07) == 0x02 || (!apt && (vsc_pack[2] & 0x07) == 0x07)));
1124 avctx->sample_aspect_ratio = s->sys->sar[is16_9];
1127 return s->sys->frame_size;
1129 #endif /* CONFIG_DVVIDEO_DECODER */
1132 static inline int dv_write_pack(enum dv_pack_type pack_id, DVVideoContext *c,
1136 * Here's what SMPTE314M says about these two:
1137 * (page 6) APTn, AP1n, AP2n, AP3n: These data shall be identical
1138 * as track application IDs (APTn = 001, AP1n =
1139 * 001, AP2n = 001, AP3n = 001), if the source signal
1140 * comes from a digital VCR. If the signal source is
1141 * unknown, all bits for these data shall be set to 1.
1142 * (page 12) STYPE: STYPE defines a signal type of video signal
1143 * 00000b = 4:1:1 compression
1144 * 00100b = 4:2:2 compression
1146 * Now, I've got two problems with these statements:
1147 * 1. it looks like APT == 111b should be a safe bet, but it isn't.
1148 * It seems that for PAL as defined in IEC 61834 we have to set
1149 * APT to 000 and for SMPTE314M to 001.
1150 * 2. It is not at all clear what STYPE is used for 4:2:0 PAL
1151 * compression scheme (if any).
1153 int apt = (c->sys->pix_fmt == PIX_FMT_YUV420P ? 0 : 1);
1156 if ((int)(av_q2d(c->avctx->sample_aspect_ratio) * c->avctx->width / c->avctx->height * 10) >= 17) /* 16:9 */
1159 buf[0] = (uint8_t)pack_id;
1161 case dv_header525: /* I can't imagine why these two weren't defined as real */
1162 case dv_header625: /* packs in SMPTE314M -- they definitely look like ones */
1163 buf[1] = 0xf8 | /* reserved -- always 1 */
1164 (apt & 0x07); /* APT: Track application ID */
1165 buf[2] = (0 << 7) | /* TF1: audio data is 0 - valid; 1 - invalid */
1166 (0x0f << 3) | /* reserved -- always 1 */
1167 (apt & 0x07); /* AP1: Audio application ID */
1168 buf[3] = (0 << 7) | /* TF2: video data is 0 - valid; 1 - invalid */
1169 (0x0f << 3) | /* reserved -- always 1 */
1170 (apt & 0x07); /* AP2: Video application ID */
1171 buf[4] = (0 << 7) | /* TF3: subcode(SSYB) is 0 - valid; 1 - invalid */
1172 (0x0f << 3) | /* reserved -- always 1 */
1173 (apt & 0x07); /* AP3: Subcode application ID */
1175 case dv_video_source:
1176 buf[1] = 0xff; /* reserved -- always 1 */
1177 buf[2] = (1 << 7) | /* B/W: 0 - b/w, 1 - color */
1178 (1 << 6) | /* following CLF is valid - 0, invalid - 1 */
1179 (3 << 4) | /* CLF: color frames ID (see ITU-R BT.470-4) */
1180 0xf; /* reserved -- always 1 */
1181 buf[3] = (3 << 6) | /* reserved -- always 1 */
1182 (c->sys->dsf << 5) | /* system: 60fields/50fields */
1183 c->sys->video_stype; /* signal type video compression */
1184 buf[4] = 0xff; /* VISC: 0xff -- no information */
1186 case dv_video_control:
1187 buf[1] = (0 << 6) | /* Copy generation management (CGMS) 0 -- free */
1188 0x3f; /* reserved -- always 1 */
1189 buf[2] = 0xc8 | /* reserved -- always b11001xxx */
1191 buf[3] = (1 << 7) | /* frame/field flag 1 -- frame, 0 -- field */
1192 (1 << 6) | /* first/second field flag 0 -- field 2, 1 -- field 1 */
1193 (1 << 5) | /* frame change flag 0 -- same picture as before, 1 -- different */
1194 (1 << 4) | /* 1 - interlaced, 0 - noninterlaced */
1195 0xc; /* reserved -- always b1100 */
1196 buf[4] = 0xff; /* reserved -- always 1 */
1199 buf[1] = buf[2] = buf[3] = buf[4] = 0xff;
1204 #if CONFIG_DVVIDEO_ENCODER
1205 static void dv_format_frame(DVVideoContext* c, uint8_t* buf)
1209 for (chan = 0; chan < c->sys->n_difchan; chan++) {
1210 for (i = 0; i < c->sys->difseg_size; i++) {
1211 memset(buf, 0xff, 80 * 6); /* first 6 DIF blocks are for control data */
1213 /* DV header: 1DIF */
1214 buf += dv_write_dif_id(dv_sect_header, chan, i, 0, buf);
1215 buf += dv_write_pack((c->sys->dsf ? dv_header625 : dv_header525), c, buf);
1216 buf += 72; /* unused bytes */
1218 /* DV subcode: 2DIFs */
1219 for (j = 0; j < 2; j++) {
1220 buf += dv_write_dif_id(dv_sect_subcode, chan, i, j, buf);
1221 for (k = 0; k < 6; k++)
1222 buf += dv_write_ssyb_id(k, (i < c->sys->difseg_size/2), buf) + 5;
1223 buf += 29; /* unused bytes */
1226 /* DV VAUX: 3DIFS */
1227 for (j = 0; j < 3; j++) {
1228 buf += dv_write_dif_id(dv_sect_vaux, chan, i, j, buf);
1229 buf += dv_write_pack(dv_video_source, c, buf);
1230 buf += dv_write_pack(dv_video_control, c, buf);
1232 buf += dv_write_pack(dv_video_source, c, buf);
1233 buf += dv_write_pack(dv_video_control, c, buf);
1234 buf += 4*5 + 2; /* unused bytes */
1237 /* DV Audio/Video: 135 Video DIFs + 9 Audio DIFs */
1238 for (j = 0; j < 135; j++) {
1240 memset(buf, 0xff, 80);
1241 buf += dv_write_dif_id(dv_sect_audio, chan, i, j/15, buf);
1242 buf += 77; /* audio control & shuffled PCM audio */
1244 buf += dv_write_dif_id(dv_sect_video, chan, i, j, buf);
1245 buf += 77; /* 1 video macroblock: 1 bytes control
1246 4 * 14 bytes Y 8x8 data
1247 10 bytes Cr 8x8 data
1248 10 bytes Cb 8x8 data */
1255 static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
1258 DVVideoContext *s = c->priv_data;
1260 s->sys = ff_dv_codec_profile(c);
1261 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys))
1264 c->pix_fmt = s->sys->pix_fmt;
1265 s->picture = *((AVFrame *)data);
1266 s->picture.key_frame = 1;
1267 s->picture.pict_type = FF_I_TYPE;
1270 c->execute(c, dv_encode_video_segment, s->sys->work_chunks, NULL,
1271 dv_work_pool_size(s->sys), sizeof(DVwork_chunk));
1275 dv_format_frame(s, buf);
1277 return s->sys->frame_size;
1281 static int dvvideo_close(AVCodecContext *c)
1283 DVVideoContext *s = c->priv_data;
1285 if (s->picture.data[0])
1286 c->release_buffer(c, &s->picture);
1292 #if CONFIG_DVVIDEO_ENCODER
1293 AVCodec ff_dvvideo_encoder = {
1297 sizeof(DVVideoContext),
1298 dvvideo_init_encoder,
1299 dvvideo_encode_frame,
1300 .pix_fmts = (const enum PixelFormat[]) {PIX_FMT_YUV411P, PIX_FMT_YUV422P, PIX_FMT_YUV420P, PIX_FMT_NONE},
1301 .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
1303 #endif // CONFIG_DVVIDEO_ENCODER
1305 #if CONFIG_DVVIDEO_DECODER
1306 AVCodec ff_dvvideo_decoder = {
1310 sizeof(DVVideoContext),
1314 dvvideo_decode_frame,
1318 .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),