3 * Copyright (c) 2002 Fabrice Bellard
4 * Copyright (c) 2004 Roman Shaposhnik
7 * Copyright (c) 2003 Roman Shaposhnik
9 * 50 Mbps (DVCPRO50) support
10 * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com>
12 * 100 Mbps (DVCPRO HD) support
13 * Initial code by Daniel Maas <dmaas@maasdigital.com> (funded by BBC R&D)
14 * Final code by Roman Shaposhnik
16 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
17 * of DV technical info.
19 * This file is part of FFmpeg.
21 * FFmpeg is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU Lesser General Public
23 * License as published by the Free Software Foundation; either
24 * version 2.1 of the License, or (at your option) any later version.
26 * FFmpeg is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * Lesser General Public License for more details.
31 * You should have received a copy of the GNU Lesser General Public
32 * License along with FFmpeg; if not, write to the Free Software
33 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
40 #define ALT_BITSTREAM_READER
45 #include "simple_idct.h"
47 #include "dv_tablegen.h"
52 typedef struct DVVideoContext {
55 AVCodecContext *avctx;
58 uint8_t dv_zigzag[2][64];
60 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
61 void (*fdct[2])(DCTELEM *block);
62 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
63 me_cmp_func ildct_cmp;
66 #define TEX_VLC_BITS 9
68 /* XXX: also include quantization */
69 static RL_VLC_ELEM dv_rl_vlc[1184];
71 static inline int dv_work_pool_size(const DVprofile *d)
73 int size = d->n_difchan*d->difseg_size*27;
74 if (DV_PROFILE_IS_1080i50(d))
76 if (DV_PROFILE_IS_720p50(d))
81 static inline void dv_calc_mb_coordinates(const DVprofile *d, int chan, int seq, int slot,
84 static const uint8_t off[] = { 2, 6, 8, 0, 4 };
85 static const uint8_t shuf1[] = { 36, 18, 54, 0, 72 };
86 static const uint8_t shuf2[] = { 24, 12, 36, 0, 48 };
87 static const uint8_t shuf3[] = { 18, 9, 27, 0, 36 };
89 static const uint8_t l_start[] = {0, 4, 9, 13, 18, 22, 27, 31, 36, 40};
90 static const uint8_t l_start_shuffled[] = { 9, 4, 13, 0, 18 };
92 static const uint8_t serpent1[] = {0, 1, 2, 2, 1, 0,
97 static const uint8_t serpent2[] = {0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0,
98 0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0,
101 static const uint8_t remap[][2] = {{ 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, /* dummy */
102 { 0, 0}, { 0, 1}, { 0, 2}, { 0, 3}, {10, 0},
103 {10, 1}, {10, 2}, {10, 3}, {20, 0}, {20, 1},
104 {20, 2}, {20, 3}, {30, 0}, {30, 1}, {30, 2},
105 {30, 3}, {40, 0}, {40, 1}, {40, 2}, {40, 3},
106 {50, 0}, {50, 1}, {50, 2}, {50, 3}, {60, 0},
107 {60, 1}, {60, 2}, {60, 3}, {70, 0}, {70, 1},
108 {70, 2}, {70, 3}, { 0,64}, { 0,65}, { 0,66},
109 {10,64}, {10,65}, {10,66}, {20,64}, {20,65},
110 {20,66}, {30,64}, {30,65}, {30,66}, {40,64},
111 {40,65}, {40,66}, {50,64}, {50,65}, {50,66},
112 {60,64}, {60,65}, {60,66}, {70,64}, {70,65},
113 {70,66}, { 0,67}, {20,67}, {40,67}, {60,67}};
118 for (m=0; m<5; m++) {
121 blk = (chan*11+seq)*27+slot;
123 if (chan == 0 && seq == 11) {
132 i = (4*chan + blk + off[m])%11;
135 x = shuf1[m] + (chan&1)*9 + k%9;
136 y = (i*3+k/9)*2 + (chan>>1) + 1;
138 tbl[m] = (x<<1)|(y<<9);
141 blk = (chan*10+seq)*27+slot;
143 i = (4*chan + (seq/5) + 2*blk + off[m])%10;
146 x = shuf1[m]+(chan&1)*9 + k%9;
147 y = (i*3+k/9)*2 + (chan>>1) + 4;
150 x = remap[y][0]+((x-80)<<(y>59));
153 tbl[m] = (x<<1)|(y<<9);
156 blk = (chan*10+seq)*27+slot;
158 i = (4*chan + (seq/5) + 2*blk + off[m])%10;
159 k = (blk/5)%27 + (i&1)*3;
161 x = shuf2[m] + k%6 + 6*(chan&1);
162 y = l_start[i] + k/6 + 45*(chan>>1);
163 tbl[m] = (x<<1)|(y<<9);
166 switch (d->pix_fmt) {
167 case PIX_FMT_YUV422P:
168 x = shuf3[m] + slot/3;
170 ((((seq + off[m]) % d->difseg_size)<<1) + chan)*3;
171 tbl[m] = (x<<1)|(y<<8);
173 case PIX_FMT_YUV420P:
174 x = shuf3[m] + slot/3;
176 ((seq + off[m]) % d->difseg_size)*3;
177 tbl[m] = (x<<1)|(y<<9);
179 case PIX_FMT_YUV411P:
180 i = (seq + off[m]) % d->difseg_size;
181 k = slot + ((m==1||m==2)?3:0);
183 x = l_start_shuffled[m] + k/6;
184 y = serpent2[k] + i*6;
187 tbl[m] = (x<<2)|(y<<8);
196 static int dv_init_dynamic_tables(const DVprofile *d)
199 uint32_t *factor1, *factor2;
200 const int *iweight1, *iweight2;
202 if (!d->work_chunks[dv_work_pool_size(d)-1].buf_offset) {
204 for (c=0; c<d->n_difchan; c++) {
205 for (s=0; s<d->difseg_size; s++) {
207 for (j=0; j<27; j++) {
209 if (!(DV_PROFILE_IS_1080i50(d) && c != 0 && s == 11) &&
210 !(DV_PROFILE_IS_720p50(d) && s > 9)) {
211 dv_calc_mb_coordinates(d, c, s, j, &d->work_chunks[i].mb_coordinates[0]);
212 d->work_chunks[i++].buf_offset = p;
220 if (!d->idct_factor[DV_PROFILE_IS_HD(d)?8191:5631]) {
221 factor1 = &d->idct_factor[0];
222 factor2 = &d->idct_factor[DV_PROFILE_IS_HD(d)?4096:2816];
223 if (d->height == 720) {
224 iweight1 = &dv_iweight_720_y[0];
225 iweight2 = &dv_iweight_720_c[0];
227 iweight1 = &dv_iweight_1080_y[0];
228 iweight2 = &dv_iweight_1080_c[0];
230 if (DV_PROFILE_IS_HD(d)) {
231 for (c = 0; c < 4; c++) {
232 for (s = 0; s < 16; s++) {
233 for (i = 0; i < 64; i++) {
234 *factor1++ = (dv100_qstep[s] << (c + 9)) * iweight1[i];
235 *factor2++ = (dv100_qstep[s] << (c + 9)) * iweight2[i];
240 iweight1 = &dv_iweight_88[0];
241 for (j = 0; j < 2; j++, iweight1 = &dv_iweight_248[0]) {
242 for (s = 0; s < 22; s++) {
243 for (i = c = 0; c < 4; c++) {
244 for (; i < dv_quant_areas[c]; i++) {
245 *factor1 = iweight1[i] << (dv_quant_shifts[s][c] + 1);
246 *factor2++ = (*factor1++) << 1;
257 static av_cold int dvvideo_init(AVCodecContext *avctx)
259 DVVideoContext *s = avctx->priv_data;
266 uint16_t new_dv_vlc_bits[NB_DV_VLC*2];
267 uint8_t new_dv_vlc_len[NB_DV_VLC*2];
268 uint8_t new_dv_vlc_run[NB_DV_VLC*2];
269 int16_t new_dv_vlc_level[NB_DV_VLC*2];
273 /* it's faster to include sign bit in a generic VLC parsing scheme */
274 for (i = 0, j = 0; i < NB_DV_VLC; i++, j++) {
275 new_dv_vlc_bits[j] = dv_vlc_bits[i];
276 new_dv_vlc_len[j] = dv_vlc_len[i];
277 new_dv_vlc_run[j] = dv_vlc_run[i];
278 new_dv_vlc_level[j] = dv_vlc_level[i];
280 if (dv_vlc_level[i]) {
281 new_dv_vlc_bits[j] <<= 1;
285 new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
286 new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
287 new_dv_vlc_run[j] = dv_vlc_run[i];
288 new_dv_vlc_level[j] = -dv_vlc_level[i];
292 /* NOTE: as a trick, we use the fact the no codes are unused
293 to accelerate the parsing of partial codes */
294 init_vlc(&dv_vlc, TEX_VLC_BITS, j,
295 new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
296 assert(dv_vlc.table_size == 1184);
298 for (i = 0; i < dv_vlc.table_size; i++){
299 int code = dv_vlc.table[i][0];
300 int len = dv_vlc.table[i][1];
303 if (len < 0){ //more bits needed
307 run = new_dv_vlc_run [code] + 1;
308 level = new_dv_vlc_level[code];
310 dv_rl_vlc[i].len = len;
311 dv_rl_vlc[i].level = level;
312 dv_rl_vlc[i].run = run;
316 dv_vlc_map_tableinit();
319 /* Generic DSP setup */
320 dsputil_init(&dsp, avctx);
321 ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
322 s->get_pixels = dsp.get_pixels;
323 s->ildct_cmp = dsp.ildct_cmp[5];
326 s->fdct[0] = dsp.fdct;
327 s->idct_put[0] = dsp.idct_put;
328 for (i = 0; i < 64; i++)
329 s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
332 s->fdct[1] = dsp.fdct248;
333 s->idct_put[1] = ff_simple_idct248_put; // FIXME: need to add it to DSP
335 for (i = 0; i < 64; i++){
336 int j = ff_zigzag248_direct[i];
337 s->dv_zigzag[1][i] = dsp.idct_permutation[(j & 7) + (j & 8) * 4 + (j & 48) / 2];
340 memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
342 avctx->coded_frame = &s->picture;
344 avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;
349 static av_cold int dvvideo_init_encoder(AVCodecContext *avctx)
351 if (!ff_dv_codec_profile(avctx)) {
352 av_log(avctx, AV_LOG_ERROR, "Found no DV profile for %ix%i %s video\n",
353 avctx->width, avctx->height, avcodec_get_pix_fmt_name(avctx->pix_fmt));
357 return dvvideo_init(avctx);
361 // #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
363 typedef struct BlockInfo {
364 const uint32_t *factor_table;
365 const uint8_t *scan_table;
366 uint8_t pos; /* position in block */
367 void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
368 uint8_t partial_bit_count;
369 uint16_t partial_bit_buffer;
373 /* bit budget for AC only in 5 MBs */
374 static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5;
375 /* see dv_88_areas and dv_248_areas for details */
376 static const int mb_area_start[5] = { 1, 6, 21, 43, 64 };
378 static inline int put_bits_left(PutBitContext* s)
380 return (s->buf_end - s->buf) * 8 - put_bits_count(s);
383 /* decode ac coefficients */
384 static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
386 int last_index = gb->size_in_bits;
387 const uint8_t *scan_table = mb->scan_table;
388 const uint32_t *factor_table = mb->factor_table;
390 int partial_bit_count = mb->partial_bit_count;
391 int level, run, vlc_len, index;
394 UPDATE_CACHE(re, gb);
396 /* if we must parse a partial vlc, we do it here */
397 if (partial_bit_count > 0) {
398 re_cache = ((unsigned)re_cache >> partial_bit_count) |
399 (mb->partial_bit_buffer << (sizeof(re_cache) * 8 - partial_bit_count));
400 re_index -= partial_bit_count;
401 mb->partial_bit_count = 0;
404 /* get the AC coefficients until last_index is reached */
407 printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index);
409 /* our own optimized GET_RL_VLC */
410 index = NEG_USR32(re_cache, TEX_VLC_BITS);
411 vlc_len = dv_rl_vlc[index].len;
413 index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
414 vlc_len = TEX_VLC_BITS - vlc_len;
416 level = dv_rl_vlc[index].level;
417 run = dv_rl_vlc[index].run;
419 /* gotta check if we're still within gb boundaries */
420 if (re_index + vlc_len > last_index) {
421 /* should be < 16 bits otherwise a codeword could have been parsed */
422 mb->partial_bit_count = last_index - re_index;
423 mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
424 re_index = last_index;
430 printf("run=%d level=%d\n", run, level);
436 level = (level * factor_table[pos] + (1 << (dv_iweight_bits - 1))) >> dv_iweight_bits;
437 block[scan_table[pos]] = level;
439 UPDATE_CACHE(re, gb);
441 CLOSE_READER(re, gb);
445 static inline void bit_copy(PutBitContext *pb, GetBitContext *gb)
447 int bits_left = get_bits_left(gb);
448 while (bits_left >= MIN_CACHE_BITS) {
449 put_bits(pb, MIN_CACHE_BITS, get_bits(gb, MIN_CACHE_BITS));
450 bits_left -= MIN_CACHE_BITS;
453 put_bits(pb, bits_left, get_bits(gb, bits_left));
457 static inline void dv_calculate_mb_xy(DVVideoContext *s, DVwork_chunk *work_chunk, int m, int *mb_x, int *mb_y)
459 *mb_x = work_chunk->mb_coordinates[m] & 0xff;
460 *mb_y = work_chunk->mb_coordinates[m] >> 8;
462 /* We work with 720p frames split in half. The odd half-frame (chan==2,3) is displaced :-( */
463 if (s->sys->height == 720 && !(s->buf[1]&0x0C)) {
464 *mb_y -= (*mb_y>17)?18:-72; /* shifting the Y coordinate down by 72/2 macro blocks */
468 /* mb_x and mb_y are in units of 8 pixels */
469 static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
471 DVVideoContext *s = avctx->priv_data;
472 DVwork_chunk *work_chunk = arg;
473 int quant, dc, dct_mode, class1, j;
474 int mb_index, mb_x, mb_y, last_index;
475 int y_stride, linesize;
476 DCTELEM *block, *block1;
479 const uint8_t *buf_ptr;
480 PutBitContext pb, vs_pb;
482 BlockInfo mb_data[5 * DV_MAX_BPM], *mb, *mb1;
483 LOCAL_ALIGNED_16(DCTELEM, sblock, [5*DV_MAX_BPM], [64]);
484 LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [80 + 4]); /* allow some slack */
485 LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [5 * 80 + 4]); /* allow some slack */
486 const int log2_blocksize = 3-s->avctx->lowres;
487 int is_field_mode[5];
489 assert((((int)mb_bit_buffer) & 7) == 0);
490 assert((((int)vs_bit_buffer) & 7) == 0);
492 memset(sblock, 0, 5*DV_MAX_BPM*sizeof(*sblock));
494 /* pass 1 : read DC and AC coefficients in blocks */
495 buf_ptr = &s->buf[work_chunk->buf_offset*80];
496 block1 = &sblock[0][0];
498 init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80);
499 for (mb_index = 0; mb_index < 5; mb_index++, mb1 += s->sys->bpm, block1 += s->sys->bpm * 64) {
501 quant = buf_ptr[3] & 0x0f;
503 init_put_bits(&pb, mb_bit_buffer, 80);
506 is_field_mode[mb_index] = 0;
507 for (j = 0; j < s->sys->bpm; j++) {
508 last_index = s->sys->block_sizes[j];
509 init_get_bits(&gb, buf_ptr, last_index);
512 dc = get_sbits(&gb, 9);
513 dct_mode = get_bits1(&gb);
514 class1 = get_bits(&gb, 2);
515 if (DV_PROFILE_IS_HD(s->sys)) {
516 mb->idct_put = s->idct_put[0];
517 mb->scan_table = s->dv_zigzag[0];
518 mb->factor_table = &s->sys->idct_factor[(j >= 4)*4*16*64 + class1*16*64 + quant*64];
519 is_field_mode[mb_index] |= !j && dct_mode;
521 mb->idct_put = s->idct_put[dct_mode && log2_blocksize == 3];
522 mb->scan_table = s->dv_zigzag[dct_mode];
523 mb->factor_table = &s->sys->idct_factor[(class1 == 3)*2*22*64 + dct_mode*22*64 +
524 (quant + dv_quant_offset[class1])*64];
527 /* convert to unsigned because 128 is not added in the
531 buf_ptr += last_index >> 3;
533 mb->partial_bit_count = 0;
536 printf("MB block: %d, %d ", mb_index, j);
538 dv_decode_ac(&gb, mb, block);
540 /* write the remaining bits in a new buffer only if the
549 /* pass 2 : we can do it just after */
551 printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
555 init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
557 for (j = 0; j < s->sys->bpm; j++, block += 64, mb++) {
558 if (mb->pos < 64 && get_bits_left(&gb) > 0) {
559 dv_decode_ac(&gb, mb, block);
560 /* if still not finished, no need to parse other blocks */
565 /* all blocks are finished, so the extra bytes can be used at
566 the video segment level */
567 if (j >= s->sys->bpm)
568 bit_copy(&vs_pb, &gb);
571 /* we need a pass other the whole video segment */
573 printf("***pass 3 size=%d\n", put_bits_count(&vs_pb));
575 block = &sblock[0][0];
577 init_get_bits(&gb, vs_bit_buffer, put_bits_count(&vs_pb));
578 flush_put_bits(&vs_pb);
579 for (mb_index = 0; mb_index < 5; mb_index++) {
580 for (j = 0; j < s->sys->bpm; j++) {
583 printf("start %d:%d\n", mb_index, j);
585 dv_decode_ac(&gb, mb, block);
587 if (mb->pos >= 64 && mb->pos < 127)
588 av_log(avctx, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
594 /* compute idct and place blocks */
595 block = &sblock[0][0];
597 for (mb_index = 0; mb_index < 5; mb_index++) {
598 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
600 /* idct_put'ting luminance */
601 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) ||
602 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) ||
603 (s->sys->height >= 720 && mb_y != 134)) {
604 y_stride = (s->picture.linesize[0] << ((!is_field_mode[mb_index]) * log2_blocksize));
606 y_stride = (2 << log2_blocksize);
608 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << log2_blocksize);
609 linesize = s->picture.linesize[0] << is_field_mode[mb_index];
610 mb[0] .idct_put(y_ptr , linesize, block + 0*64);
611 if (s->sys->video_stype == 4) { /* SD 422 */
612 mb[2].idct_put(y_ptr + (1 << log2_blocksize) , linesize, block + 2*64);
614 mb[1].idct_put(y_ptr + (1 << log2_blocksize) , linesize, block + 1*64);
615 mb[2].idct_put(y_ptr + y_stride, linesize, block + 2*64);
616 mb[3].idct_put(y_ptr + (1 << log2_blocksize) + y_stride, linesize, block + 3*64);
621 /* idct_put'ting chrominance */
622 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] +
623 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << log2_blocksize);
624 for (j = 2; j; j--) {
625 uint8_t *c_ptr = s->picture.data[j] + c_offset;
626 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
627 uint64_t aligned_pixels[64/8];
628 uint8_t *pixels = (uint8_t*)aligned_pixels;
629 uint8_t *c_ptr1, *ptr1;
631 mb->idct_put(pixels, 8, block);
632 for (y = 0; y < (1 << log2_blocksize); y++, c_ptr += s->picture.linesize[j], pixels += 8) {
633 ptr1 = pixels + (1 << (log2_blocksize - 1));
634 c_ptr1 = c_ptr + (s->picture.linesize[j] << log2_blocksize);
635 for (x = 0; x < (1 << (log2_blocksize - 1)); x++) {
636 c_ptr[x] = pixels[x];
642 y_stride = (mb_y == 134) ? (1 << log2_blocksize) :
643 s->picture.linesize[j] << ((!is_field_mode[mb_index]) * log2_blocksize);
644 linesize = s->picture.linesize[j] << is_field_mode[mb_index];
645 (mb++)-> idct_put(c_ptr , linesize, block); block += 64;
646 if (s->sys->bpm == 8) {
647 (mb++)->idct_put(c_ptr + y_stride, linesize, block); block += 64;
656 /* Converts run and level (where level != 0) pair into vlc, returning bit size */
657 static av_always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
660 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
661 *vlc = dv_vlc_map[run][level].vlc | sign;
662 size = dv_vlc_map[run][level].size;
665 if (level < DV_VLC_MAP_LEV_SIZE) {
666 *vlc = dv_vlc_map[0][level].vlc | sign;
667 size = dv_vlc_map[0][level].size;
669 *vlc = 0xfe00 | (level << 1) | sign;
673 *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
674 (0x1f80 | (run - 1))) << size;
675 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
682 static av_always_inline int dv_rl2vlc_size(int run, int level)
686 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
687 size = dv_vlc_map[run][level].size;
690 size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
692 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
698 static av_always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
700 *vlc = dv_vlc_map[run][l].vlc | sign;
701 return dv_vlc_map[run][l].size;
704 static av_always_inline int dv_rl2vlc_size(int run, int l)
706 return dv_vlc_map[run][l].size;
710 typedef struct EncBlockInfo {
720 uint8_t partial_bit_count;
721 uint32_t partial_bit_buffer; /* we can't use uint16_t here */
724 static av_always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi,
725 PutBitContext* pb_pool,
726 PutBitContext* pb_end)
729 PutBitContext* pb = pb_pool;
730 int size = bi->partial_bit_count;
731 uint32_t vlc = bi->partial_bit_buffer;
733 bi->partial_bit_count = bi->partial_bit_buffer = 0;
735 /* Find suitable storage space */
736 for (; size > (bits_left = put_bits_left(pb)); pb++) {
739 put_bits(pb, bits_left, vlc >> size);
740 vlc = vlc & ((1 << size) - 1);
742 if (pb + 1 >= pb_end) {
743 bi->partial_bit_count = size;
744 bi->partial_bit_buffer = vlc;
750 put_bits(pb, size, vlc);
752 if (bi->cur_ac >= 64)
755 /* Construct the next VLC */
757 bi->cur_ac = bi->next[prev];
758 if (bi->cur_ac < 64){
759 size = dv_rl2vlc(bi->cur_ac - prev - 1, bi->mb[bi->cur_ac], bi->sign[bi->cur_ac], &vlc);
761 size = 4; vlc = 6; /* End Of Block stamp */
767 static av_always_inline int dv_guess_dct_mode(DVVideoContext *s, uint8_t *data, int linesize) {
768 if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
769 int ps = s->ildct_cmp(NULL, data, NULL, linesize, 8) - 400;
771 int is = s->ildct_cmp(NULL, data , NULL, linesize<<1, 4) +
772 s->ildct_cmp(NULL, data + linesize, NULL, linesize<<1, 4);
780 static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, int linesize, DVVideoContext *s, int bias)
783 const uint8_t* zigzag_scan;
784 LOCAL_ALIGNED_16(DCTELEM, blk, [64]);
786 /* We offer two different methods for class number assignment: the
787 method suggested in SMPTE 314M Table 22, and an improved
788 method. The SMPTE method is very conservative; it assigns class
789 3 (i.e. severe quantization) to any block where the largest AC
790 component is greater than 36. FFmpeg's DV encoder tracks AC bit
791 consumption precisely, so there is no need to bias most blocks
792 towards strongly lossy compression. Instead, we assign class 2
793 to most blocks, and use class 3 only when strictly necessary
794 (for blocks whose largest AC component exceeds 255). */
796 #if 0 /* SMPTE spec method */
797 static const int classes[] = {12, 24, 36, 0xffff};
798 #else /* improved FFmpeg method */
799 static const int classes[] = {-1, -1, 255, 0xffff};
801 int max = classes[0];
804 assert((((int)blk) & 15) == 0);
806 bi->area_q[0] = bi->area_q[1] = bi->area_q[2] = bi->area_q[3] = 0;
807 bi->partial_bit_count = 0;
808 bi->partial_bit_buffer = 0;
811 bi->dct_mode = dv_guess_dct_mode(s, data, linesize);
812 s->get_pixels(blk, data, linesize);
813 s->fdct[bi->dct_mode](blk);
815 /* We rely on the fact that encoding all zeros leads to an immediate EOB,
816 which is precisely what the spec calls for in the "dummy" blocks. */
817 memset(blk, 0, 64*sizeof(*blk));
822 zigzag_scan = bi->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct;
823 weight = bi->dct_mode ? dv_weight_248 : dv_weight_88;
825 for (area = 0; area < 4; area++) {
826 bi->prev[area] = prev;
827 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
828 for (i = mb_area_start[area]; i < mb_area_start[area+1]; i++) {
829 int level = blk[zigzag_scan[i]];
831 if (level + 15 > 30U) {
832 bi->sign[i] = (level >> 31) & 1;
833 /* weigh it and and shift down into range, adding for rounding */
834 /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT
835 AND the 2x doubling of the weights */
836 level = (FFABS(level) * weight[i] + (1 << (dv_weight_bits+3))) >> (dv_weight_bits+4);
840 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level);
847 for (bi->cno = 0; max > classes[bi->cno]; bi->cno++);
855 for (area = 0; area < 4; area++) {
856 bi->prev[area] = prev;
857 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
858 for (; i < mb_area_start[area+1]; i = bi->next[i]) {
862 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, bi->mb[i]);
871 return bi->bit_size[0] + bi->bit_size[1] + bi->bit_size[2] + bi->bit_size[3];
874 static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
877 int i, j, k, a, prev, a2;
880 size[0] = size[1] = size[2] = size[3] = size[4] = 1 << 24;
883 for (i = 0; i < 5; i++) {
889 for (j = 0; j < 6; j++, b++) {
890 for (a = 0; a < 4; a++) {
891 if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
892 b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
895 assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]);
896 for (k = b->next[prev] ; k < mb_area_start[a+1]; k = b->next[k]) {
899 b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
902 if (b->next[k] >= mb_area_start[a+1] && b->next[k]<64){
903 for (a2 = a + 1; b->next[k] >= mb_area_start[a2+1]; a2++)
906 assert(b->mb[b->next[k]]);
907 b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]])
908 -dv_rl2vlc_size(b->next[k] - k - 1, b->mb[b->next[k]]);
909 assert(b->prev[a2] == k && (a2 + 1 >= 4 || b->prev[a2+1] != k));
912 b->next[prev] = b->next[k];
917 size[i] += b->bit_size[a];
920 if (vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4])
923 } while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]);
926 for (a = 2; a == 2 || vs_total_ac_bits < size[0]; a += a){
928 size[0] = 5 * 6 * 4; //EOB
929 for (j = 0; j < 6 *5; j++, b++) {
931 for (k = b->next[prev]; k < 64; k = b->next[k]) {
932 if (b->mb[k] < a && b->mb[k] > -a){
933 b->next[prev] = b->next[k];
935 size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
943 static int dv_encode_video_segment(AVCodecContext *avctx, void *arg)
945 DVVideoContext *s = avctx->priv_data;
946 DVwork_chunk *work_chunk = arg;
948 int mb_x, mb_y, c_offset, linesize, y_stride;
952 EncBlockInfo enc_blks[5*DV_MAX_BPM];
953 PutBitContext pbs[5*DV_MAX_BPM];
955 EncBlockInfo* enc_blk;
957 int qnos[5] = {15, 15, 15, 15, 15}; /* No quantization */
958 int* qnosp = &qnos[0];
960 dif = &s->buf[work_chunk->buf_offset*80];
961 enc_blk = &enc_blks[0];
962 for (mb_index = 0; mb_index < 5; mb_index++) {
963 dv_calculate_mb_xy(s, work_chunk, mb_index, &mb_x, &mb_y);
965 /* initializing luminance blocks */
966 if ((s->sys->pix_fmt == PIX_FMT_YUV420P) ||
967 (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) ||
968 (s->sys->height >= 720 && mb_y != 134)) {
969 y_stride = s->picture.linesize[0] << 3;
973 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x) << 3);
974 linesize = s->picture.linesize[0];
976 if (s->sys->video_stype == 4) { /* SD 422 */
978 dv_init_enc_block(enc_blk+0, y_ptr , linesize, s, 0) +
979 dv_init_enc_block(enc_blk+1, NULL , linesize, s, 0) +
980 dv_init_enc_block(enc_blk+2, y_ptr + 8 , linesize, s, 0) +
981 dv_init_enc_block(enc_blk+3, NULL , linesize, s, 0);
984 dv_init_enc_block(enc_blk+0, y_ptr , linesize, s, 0) +
985 dv_init_enc_block(enc_blk+1, y_ptr + 8 , linesize, s, 0) +
986 dv_init_enc_block(enc_blk+2, y_ptr + y_stride, linesize, s, 0) +
987 dv_init_enc_block(enc_blk+3, y_ptr + 8 + y_stride, linesize, s, 0);
991 /* initializing chrominance blocks */
992 c_offset = (((mb_y >> (s->sys->pix_fmt == PIX_FMT_YUV420P)) * s->picture.linesize[1] +
993 (mb_x >> ((s->sys->pix_fmt == PIX_FMT_YUV411P) ? 2 : 1))) << 3);
994 for (j = 2; j; j--) {
995 uint8_t *c_ptr = s->picture.data[j] + c_offset;
996 linesize = s->picture.linesize[j];
997 y_stride = (mb_y == 134) ? 8 : (s->picture.linesize[j] << 3);
998 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
1000 uint8_t* b = scratch;
1001 for (i = 0; i < 8; i++) {
1002 d = c_ptr + (linesize << 3);
1003 b[0] = c_ptr[0]; b[1] = c_ptr[1]; b[2] = c_ptr[2]; b[3] = c_ptr[3];
1004 b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
1012 vs_bit_size += dv_init_enc_block( enc_blk++, c_ptr , linesize, s, 1);
1013 if (s->sys->bpm == 8) {
1014 vs_bit_size += dv_init_enc_block(enc_blk++, c_ptr + y_stride, linesize, s, 1);
1019 if (vs_total_ac_bits < vs_bit_size)
1020 dv_guess_qnos(&enc_blks[0], qnosp);
1022 /* DIF encoding process */
1023 for (j=0; j<5*s->sys->bpm;) {
1029 /* First pass over individual cells only */
1030 for (i=0; i<s->sys->bpm; i++, j++) {
1031 int sz = s->sys->block_sizes[i]>>3;
1033 init_put_bits(&pbs[j], dif, sz);
1034 put_sbits(&pbs[j], 9, ((enc_blks[j].mb[0] >> 3) - 1024 + 2) >> 2);
1035 put_bits(&pbs[j], 1, enc_blks[j].dct_mode);
1036 put_bits(&pbs[j], 2, enc_blks[j].cno);
1038 dv_encode_ac(&enc_blks[j], &pbs[j], &pbs[j+1]);
1042 /* Second pass over each MB space */
1043 pb = &pbs[start_mb];
1044 for (i=0; i<s->sys->bpm; i++) {
1045 if (enc_blks[start_mb+i].partial_bit_count)
1046 pb = dv_encode_ac(&enc_blks[start_mb+i], pb, &pbs[start_mb+s->sys->bpm]);
1050 /* Third and final pass over the whole video segment space */
1052 for (j=0; j<5*s->sys->bpm; j++) {
1053 if (enc_blks[j].partial_bit_count)
1054 pb = dv_encode_ac(&enc_blks[j], pb, &pbs[s->sys->bpm*5]);
1055 if (enc_blks[j].partial_bit_count)
1056 av_log(avctx, AV_LOG_ERROR, "ac bitstream overflow\n");
1059 for (j=0; j<5*s->sys->bpm; j++) {
1061 int size = pbs[j].size_in_bits >> 3;
1062 flush_put_bits(&pbs[j]);
1063 pos = put_bits_count(&pbs[j]) >> 3;
1065 av_log(avctx, AV_LOG_ERROR, "bitstream written beyond buffer size\n");
1068 memset(pbs[j].buf + pos, 0xff, size - pos);
1074 #if CONFIG_DVVIDEO_DECODER
1075 /* NOTE: exactly one frame must be given (120000 bytes for NTSC,
1076 144000 bytes for PAL - or twice those for 50Mbps) */
1077 static int dvvideo_decode_frame(AVCodecContext *avctx,
1078 void *data, int *data_size,
1081 const uint8_t *buf = avpkt->data;
1082 int buf_size = avpkt->size;
1083 DVVideoContext *s = avctx->priv_data;
1085 s->sys = ff_dv_frame_profile(s->sys, buf, buf_size);
1086 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys)) {
1087 av_log(avctx, AV_LOG_ERROR, "could not find dv frame profile\n");
1088 return -1; /* NOTE: we only accept several full frames */
1091 if (s->picture.data[0])
1092 avctx->release_buffer(avctx, &s->picture);
1094 s->picture.reference = 0;
1095 s->picture.key_frame = 1;
1096 s->picture.pict_type = FF_I_TYPE;
1097 avctx->pix_fmt = s->sys->pix_fmt;
1098 avctx->time_base = s->sys->time_base;
1099 avcodec_set_dimensions(avctx, s->sys->width, s->sys->height);
1100 if (avctx->get_buffer(avctx, &s->picture) < 0) {
1101 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1104 s->picture.interlaced_frame = 1;
1105 s->picture.top_field_first = 0;
1108 avctx->execute(avctx, dv_decode_video_segment, s->sys->work_chunks, NULL,
1109 dv_work_pool_size(s->sys), sizeof(DVwork_chunk));
1114 *data_size = sizeof(AVFrame);
1115 *(AVFrame*)data = s->picture;
1117 return s->sys->frame_size;
1119 #endif /* CONFIG_DVVIDEO_DECODER */
1122 static inline int dv_write_pack(enum dv_pack_type pack_id, DVVideoContext *c,
1126 * Here's what SMPTE314M says about these two:
1127 * (page 6) APTn, AP1n, AP2n, AP3n: These data shall be identical
1128 * as track application IDs (APTn = 001, AP1n =
1129 * 001, AP2n = 001, AP3n = 001), if the source signal
1130 * comes from a digital VCR. If the signal source is
1131 * unknown, all bits for these data shall be set to 1.
1132 * (page 12) STYPE: STYPE defines a signal type of video signal
1133 * 00000b = 4:1:1 compression
1134 * 00100b = 4:2:2 compression
1136 * Now, I've got two problems with these statements:
1137 * 1. it looks like APT == 111b should be a safe bet, but it isn't.
1138 * It seems that for PAL as defined in IEC 61834 we have to set
1139 * APT to 000 and for SMPTE314M to 001.
1140 * 2. It is not at all clear what STYPE is used for 4:2:0 PAL
1141 * compression scheme (if any).
1143 int apt = (c->sys->pix_fmt == PIX_FMT_YUV420P ? 0 : 1);
1146 if ((int)(av_q2d(c->avctx->sample_aspect_ratio) * c->avctx->width / c->avctx->height * 10) >= 17) /* 16:9 */
1149 buf[0] = (uint8_t)pack_id;
1151 case dv_header525: /* I can't imagine why these two weren't defined as real */
1152 case dv_header625: /* packs in SMPTE314M -- they definitely look like ones */
1153 buf[1] = 0xf8 | /* reserved -- always 1 */
1154 (apt & 0x07); /* APT: Track application ID */
1155 buf[2] = (0 << 7) | /* TF1: audio data is 0 - valid; 1 - invalid */
1156 (0x0f << 3) | /* reserved -- always 1 */
1157 (apt & 0x07); /* AP1: Audio application ID */
1158 buf[3] = (0 << 7) | /* TF2: video data is 0 - valid; 1 - invalid */
1159 (0x0f << 3) | /* reserved -- always 1 */
1160 (apt & 0x07); /* AP2: Video application ID */
1161 buf[4] = (0 << 7) | /* TF3: subcode(SSYB) is 0 - valid; 1 - invalid */
1162 (0x0f << 3) | /* reserved -- always 1 */
1163 (apt & 0x07); /* AP3: Subcode application ID */
1165 case dv_video_source:
1166 buf[1] = 0xff; /* reserved -- always 1 */
1167 buf[2] = (1 << 7) | /* B/W: 0 - b/w, 1 - color */
1168 (1 << 6) | /* following CLF is valid - 0, invalid - 1 */
1169 (3 << 4) | /* CLF: color frames ID (see ITU-R BT.470-4) */
1170 0xf; /* reserved -- always 1 */
1171 buf[3] = (3 << 6) | /* reserved -- always 1 */
1172 (c->sys->dsf << 5) | /* system: 60fields/50fields */
1173 c->sys->video_stype; /* signal type video compression */
1174 buf[4] = 0xff; /* VISC: 0xff -- no information */
1176 case dv_video_control:
1177 buf[1] = (0 << 6) | /* Copy generation management (CGMS) 0 -- free */
1178 0x3f; /* reserved -- always 1 */
1179 buf[2] = 0xc8 | /* reserved -- always b11001xxx */
1181 buf[3] = (1 << 7) | /* frame/field flag 1 -- frame, 0 -- field */
1182 (1 << 6) | /* first/second field flag 0 -- field 2, 1 -- field 1 */
1183 (1 << 5) | /* frame change flag 0 -- same picture as before, 1 -- different */
1184 (1 << 4) | /* 1 - interlaced, 0 - noninterlaced */
1185 0xc; /* reserved -- always b1100 */
1186 buf[4] = 0xff; /* reserved -- always 1 */
1189 buf[1] = buf[2] = buf[3] = buf[4] = 0xff;
1194 #if CONFIG_DVVIDEO_ENCODER
1195 static void dv_format_frame(DVVideoContext* c, uint8_t* buf)
1199 for (chan = 0; chan < c->sys->n_difchan; chan++) {
1200 for (i = 0; i < c->sys->difseg_size; i++) {
1201 memset(buf, 0xff, 80 * 6); /* first 6 DIF blocks are for control data */
1203 /* DV header: 1DIF */
1204 buf += dv_write_dif_id(dv_sect_header, chan, i, 0, buf);
1205 buf += dv_write_pack((c->sys->dsf ? dv_header625 : dv_header525), c, buf);
1206 buf += 72; /* unused bytes */
1208 /* DV subcode: 2DIFs */
1209 for (j = 0; j < 2; j++) {
1210 buf += dv_write_dif_id(dv_sect_subcode, chan, i, j, buf);
1211 for (k = 0; k < 6; k++)
1212 buf += dv_write_ssyb_id(k, (i < c->sys->difseg_size/2), buf) + 5;
1213 buf += 29; /* unused bytes */
1216 /* DV VAUX: 3DIFS */
1217 for (j = 0; j < 3; j++) {
1218 buf += dv_write_dif_id(dv_sect_vaux, chan, i, j, buf);
1219 buf += dv_write_pack(dv_video_source, c, buf);
1220 buf += dv_write_pack(dv_video_control, c, buf);
1222 buf += dv_write_pack(dv_video_source, c, buf);
1223 buf += dv_write_pack(dv_video_control, c, buf);
1224 buf += 4*5 + 2; /* unused bytes */
1227 /* DV Audio/Video: 135 Video DIFs + 9 Audio DIFs */
1228 for (j = 0; j < 135; j++) {
1230 memset(buf, 0xff, 80);
1231 buf += dv_write_dif_id(dv_sect_audio, chan, i, j/15, buf);
1232 buf += 77; /* audio control & shuffled PCM audio */
1234 buf += dv_write_dif_id(dv_sect_video, chan, i, j, buf);
1235 buf += 77; /* 1 video macroblock: 1 bytes control
1236 4 * 14 bytes Y 8x8 data
1237 10 bytes Cr 8x8 data
1238 10 bytes Cb 8x8 data */
1245 static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
1248 DVVideoContext *s = c->priv_data;
1250 s->sys = ff_dv_codec_profile(c);
1251 if (!s->sys || buf_size < s->sys->frame_size || dv_init_dynamic_tables(s->sys))
1254 c->pix_fmt = s->sys->pix_fmt;
1255 s->picture = *((AVFrame *)data);
1256 s->picture.key_frame = 1;
1257 s->picture.pict_type = FF_I_TYPE;
1260 c->execute(c, dv_encode_video_segment, s->sys->work_chunks, NULL,
1261 dv_work_pool_size(s->sys), sizeof(DVwork_chunk));
1265 dv_format_frame(s, buf);
1267 return s->sys->frame_size;
1271 static int dvvideo_close(AVCodecContext *c)
1273 DVVideoContext *s = c->priv_data;
1275 if (s->picture.data[0])
1276 c->release_buffer(c, &s->picture);
1282 #if CONFIG_DVVIDEO_ENCODER
1283 AVCodec dvvideo_encoder = {
1287 sizeof(DVVideoContext),
1288 dvvideo_init_encoder,
1289 dvvideo_encode_frame,
1290 .pix_fmts = (const enum PixelFormat[]) {PIX_FMT_YUV411P, PIX_FMT_YUV422P, PIX_FMT_YUV420P, PIX_FMT_NONE},
1291 .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
1293 #endif // CONFIG_DVVIDEO_ENCODER
1295 #if CONFIG_DVVIDEO_DECODER
1296 AVCodec dvvideo_decoder = {
1300 sizeof(DVVideoContext),
1304 dvvideo_decode_frame,
1308 .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),