3 * Copyright (c) 2002 Fabrice Bellard.
4 * Copyright (c) 2004 Roman Shaposhnik.
7 * Copyright (c) 2003 Roman Shaposhnik.
9 * 50 Mbps (DVCPRO50) support
10 * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com>
12 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
13 * of DV technical info.
15 * This library is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU Lesser General Public
17 * License as published by the Free Software Foundation; either
18 * version 2 of the License, or (at your option) any later version.
20 * This library is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * Lesser General Public License for more details.
25 * You should have received a copy of the GNU Lesser General Public
26 * License along with this library; if not, write to the Free Software
27 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
36 #include "mpegvideo.h"
37 #include "simple_idct.h"
43 typedef struct DVVideoContext {
46 AVCodecContext *avctx;
49 uint8_t dv_zigzag[2][64];
50 uint8_t dv_idct_shift[2][2][22][64];
52 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
53 void (*fdct[2])(DCTELEM *block);
54 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
57 /* MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext */
58 /* one element is needed for each video segment in a DV frame */
59 /* at most there are 2 DIF channels * 12 DIF sequences * 27 video segments (PAL 50Mbps) */
60 #define DV_ANCHOR_SIZE (2*12*27)
62 static void* dv_anchor[DV_ANCHOR_SIZE];
64 #define TEX_VLC_BITS 9
66 #ifdef DV_CODEC_TINY_TARGET
67 #define DV_VLC_MAP_RUN_SIZE 15
68 #define DV_VLC_MAP_LEV_SIZE 23
70 #define DV_VLC_MAP_RUN_SIZE 64
71 #define DV_VLC_MAP_LEV_SIZE 512 //FIXME sign was removed so this should be /2 but needs check
74 /* XXX: also include quantization */
75 static RL_VLC_ELEM *dv_rl_vlc;
76 /* VLC encoding lookup table */
77 static struct dv_vlc_pair {
80 } (*dv_vlc_map)[DV_VLC_MAP_LEV_SIZE] = NULL;
82 static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
86 /* NOTE: max left shift is 6 */
87 for(q = 0; q < 22; q++) {
89 for(i = 1; i < 64; i++) {
92 s->dv_idct_shift[0][0][q][j] =
93 dv_quant_shifts[q][dv_88_areas[i]] + 1;
94 s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
98 for(i = 1; i < 64; i++) {
100 s->dv_idct_shift[0][1][q][i] =
101 dv_quant_shifts[q][dv_248_areas[i]] + 1;
102 s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
107 static int dvvideo_init(AVCodecContext *avctx)
109 DVVideoContext *s = avctx->priv_data;
116 uint16_t new_dv_vlc_bits[NB_DV_VLC*2];
117 uint8_t new_dv_vlc_len[NB_DV_VLC*2];
118 uint8_t new_dv_vlc_run[NB_DV_VLC*2];
119 int16_t new_dv_vlc_level[NB_DV_VLC*2];
123 dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
127 /* dv_anchor lets each thread know its Id */
128 for (i=0; i<DV_ANCHOR_SIZE; i++)
129 dv_anchor[i] = (void*)(size_t)i;
131 /* it's faster to include sign bit in a generic VLC parsing scheme */
132 for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
133 new_dv_vlc_bits[j] = dv_vlc_bits[i];
134 new_dv_vlc_len[j] = dv_vlc_len[i];
135 new_dv_vlc_run[j] = dv_vlc_run[i];
136 new_dv_vlc_level[j] = dv_vlc_level[i];
138 if (dv_vlc_level[i]) {
139 new_dv_vlc_bits[j] <<= 1;
143 new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
144 new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
145 new_dv_vlc_run[j] = dv_vlc_run[i];
146 new_dv_vlc_level[j] = -dv_vlc_level[i];
150 /* NOTE: as a trick, we use the fact the no codes are unused
151 to accelerate the parsing of partial codes */
152 init_vlc(&dv_vlc, TEX_VLC_BITS, j,
153 new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
155 dv_rl_vlc = av_mallocz_static(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
159 for(i = 0; i < dv_vlc.table_size; i++){
160 int code= dv_vlc.table[i][0];
161 int len = dv_vlc.table[i][1];
164 if(len<0){ //more bits needed
168 run= new_dv_vlc_run[code] + 1;
169 level= new_dv_vlc_level[code];
171 dv_rl_vlc[i].len = len;
172 dv_rl_vlc[i].level = level;
173 dv_rl_vlc[i].run = run;
177 for (i = 0; i < NB_DV_VLC - 1; i++) {
178 if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE)
180 #ifdef DV_CODEC_TINY_TARGET
181 if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE)
185 if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
188 dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
190 dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
193 for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
194 #ifdef DV_CODEC_TINY_TARGET
195 for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
196 if (dv_vlc_map[i][j].size == 0) {
197 dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
198 (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
199 dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
200 dv_vlc_map[0][j].size;
204 for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
205 if (dv_vlc_map[i][j].size == 0) {
206 dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
207 (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
208 dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
209 dv_vlc_map[0][j].size;
211 dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
212 dv_vlc_map[i][j].vlc | 1;
213 dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
214 dv_vlc_map[i][j].size;
220 /* Generic DSP setup */
221 dsputil_init(&dsp, avctx);
222 s->get_pixels = dsp.get_pixels;
225 s->fdct[0] = dsp.fdct;
226 s->idct_put[0] = dsp.idct_put;
228 s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
231 s->fdct[1] = dsp.fdct248;
232 s->idct_put[1] = simple_idct248_put; // FIXME: need to add it to DSP
234 for (i=0; i<64; i++){
235 int j= ff_zigzag248_direct[i];
236 s->dv_zigzag[1][i] = dsp.idct_permutation[(j&7) + (j&8)*4 + (j&48)/2];
239 memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
241 /* XXX: do it only for constant case */
242 dv_build_unquantize_tables(s, dsp.idct_permutation);
244 /* FIXME: I really don't think this should be here */
245 s->sys = dv_codec_profile(avctx);
247 av_log(avctx, AV_LOG_ERROR, "Cannot determine type of DV output stream\n");
250 avctx->pix_fmt = s->sys->pix_fmt;
251 avctx->bit_rate = av_rescale(s->sys->frame_size * 8,
253 s->sys->frame_rate_base);
254 avctx->coded_frame = &s->picture;
261 // #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
263 typedef struct BlockInfo {
264 const uint8_t *shift_table;
265 const uint8_t *scan_table;
266 const int *iweight_table;
267 uint8_t pos; /* position in block */
269 uint8_t partial_bit_count;
270 uint16_t partial_bit_buffer;
274 /* block size in bits */
275 static const uint16_t block_sizes[6] = {
276 112, 112, 112, 112, 80, 80
278 /* bit budget for AC only in 5 MBs */
279 static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5;
280 /* see dv_88_areas and dv_248_areas for details */
281 static const int mb_area_start[5] = { 1, 6, 21, 43, 64 };
283 #ifndef ALT_BITSTREAM_READER
284 #warning only works with ALT_BITSTREAM_READER
285 static int re_index; //Hack to make it compile
288 static inline int get_bits_left(GetBitContext *s)
290 return s->size_in_bits - get_bits_count(s);
293 static inline int get_bits_size(GetBitContext *s)
295 return s->size_in_bits;
298 static inline int put_bits_left(PutBitContext* s)
300 return (s->buf_end - s->buf) * 8 - put_bits_count(s);
303 /* decode ac coefs */
304 static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
306 int last_index = get_bits_size(gb);
307 const uint8_t *scan_table = mb->scan_table;
308 const uint8_t *shift_table = mb->shift_table;
309 const int *iweight_table = mb->iweight_table;
311 int partial_bit_count = mb->partial_bit_count;
312 int level, pos1, run, vlc_len, index;
315 UPDATE_CACHE(re, gb);
317 /* if we must parse a partial vlc, we do it here */
318 if (partial_bit_count > 0) {
319 re_cache = ((unsigned)re_cache >> partial_bit_count) |
320 (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
321 re_index -= partial_bit_count;
322 mb->partial_bit_count = 0;
325 /* get the AC coefficients until last_index is reached */
328 printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index);
330 /* our own optimized GET_RL_VLC */
331 index = NEG_USR32(re_cache, TEX_VLC_BITS);
332 vlc_len = dv_rl_vlc[index].len;
334 index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
335 vlc_len = TEX_VLC_BITS - vlc_len;
337 level = dv_rl_vlc[index].level;
338 run = dv_rl_vlc[index].run;
340 /* gotta check if we're still within gb boundaries */
341 if (re_index + vlc_len > last_index) {
342 /* should be < 16 bits otherwise a codeword could have been parsed */
343 mb->partial_bit_count = last_index - re_index;
344 mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
345 re_index = last_index;
351 printf("run=%d level=%d\n", run, level);
357 pos1 = scan_table[pos];
358 level <<= shift_table[pos1];
360 /* unweigh, round, and shift down */
361 level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits;
365 UPDATE_CACHE(re, gb);
367 CLOSE_READER(re, gb);
371 static inline void bit_copy(PutBitContext *pb, GetBitContext *gb)
373 int bits_left = get_bits_left(gb);
374 while (bits_left >= MIN_CACHE_BITS) {
375 put_bits(pb, MIN_CACHE_BITS, get_bits(gb, MIN_CACHE_BITS));
376 bits_left -= MIN_CACHE_BITS;
379 put_bits(pb, bits_left, get_bits(gb, bits_left));
383 /* mb_x and mb_y are in units of 8 pixels */
384 static inline void dv_decode_video_segment(DVVideoContext *s,
386 const uint16_t *mb_pos_ptr)
388 int quant, dc, dct_mode, class1, j;
389 int mb_index, mb_x, mb_y, v, last_index;
390 DCTELEM *block, *block1;
393 void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
395 PutBitContext pb, vs_pb;
397 BlockInfo mb_data[5 * 6], *mb, *mb1;
398 DECLARE_ALIGNED_8(DCTELEM, sblock[5*6][64]);
399 DECLARE_ALIGNED_8(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */
400 DECLARE_ALIGNED_8(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */
401 const int log2_blocksize= 3-s->avctx->lowres;
403 assert((((int)mb_bit_buffer)&7)==0);
404 assert((((int)vs_bit_buffer)&7)==0);
406 memset(sblock, 0, sizeof(sblock));
408 /* pass 1 : read DC and AC coefficients in blocks */
410 block1 = &sblock[0][0];
412 init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80);
413 for(mb_index = 0; mb_index < 5; mb_index++, mb1 += 6, block1 += 6 * 64) {
415 quant = buf_ptr[3] & 0x0f;
417 init_put_bits(&pb, mb_bit_buffer, 80);
420 for(j = 0;j < 6; j++) {
421 last_index = block_sizes[j];
422 init_get_bits(&gb, buf_ptr, last_index);
425 dc = get_sbits(&gb, 9);
426 dct_mode = get_bits1(&gb);
427 mb->dct_mode = dct_mode;
428 mb->scan_table = s->dv_zigzag[dct_mode];
429 mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88;
430 class1 = get_bits(&gb, 2);
431 mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode]
432 [quant + dv_quant_offset[class1]];
434 /* convert to unsigned because 128 is not added in the
438 buf_ptr += last_index >> 3;
440 mb->partial_bit_count = 0;
443 printf("MB block: %d, %d ", mb_index, j);
445 dv_decode_ac(&gb, mb, block);
447 /* write the remaining bits in a new buffer only if the
456 /* pass 2 : we can do it just after */
458 printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
462 init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
464 for(j = 0;j < 6; j++, block += 64, mb++) {
465 if (mb->pos < 64 && get_bits_left(&gb) > 0) {
466 dv_decode_ac(&gb, mb, block);
467 /* if still not finished, no need to parse other blocks */
472 /* all blocks are finished, so the extra bytes can be used at
473 the video segment level */
475 bit_copy(&vs_pb, &gb);
478 /* we need a pass other the whole video segment */
480 printf("***pass 3 size=%d\n", put_bits_count(&vs_pb));
482 block = &sblock[0][0];
484 init_get_bits(&gb, vs_bit_buffer, put_bits_count(&vs_pb));
485 flush_put_bits(&vs_pb);
486 for(mb_index = 0; mb_index < 5; mb_index++) {
487 for(j = 0;j < 6; j++) {
490 printf("start %d:%d\n", mb_index, j);
492 dv_decode_ac(&gb, mb, block);
494 if (mb->pos >= 64 && mb->pos < 127)
495 av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
501 /* compute idct and place blocks */
502 block = &sblock[0][0];
504 for(mb_index = 0; mb_index < 5; mb_index++) {
508 if (s->sys->pix_fmt == PIX_FMT_YUV422P) {
509 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + (mb_x>>1))<<log2_blocksize);
510 c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize);
511 } else { /* 4:1:1 or 4:2:0 */
512 y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize);
513 if (s->sys->pix_fmt == PIX_FMT_YUV411P)
514 c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize);
516 c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize);
518 for(j = 0;j < 6; j++) {
519 idct_put = s->idct_put[mb->dct_mode && log2_blocksize==3];
520 if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */
521 if (j == 0 || j == 2) {
523 idct_put(y_ptr + ((j >> 1)<<log2_blocksize),
524 s->picture.linesize[0], block);
527 idct_put(s->picture.data[6 - j] + c_offset,
528 s->picture.linesize[6 - j], block);
530 /* note: j=1 and j=3 are "dummy" blocks in 4:2:2 */
531 } else { /* 4:1:1 or 4:2:0 */
533 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
534 /* NOTE: at end of line, the macroblock is handled as 420 */
535 idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block);
537 idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize),
538 s->picture.linesize[0], block);
541 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
542 uint64_t aligned_pixels[64/8];
543 uint8_t *pixels= (uint8_t*)aligned_pixels;
544 uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
546 /* NOTE: at end of line, the macroblock is handled as 420 */
547 idct_put(pixels, 8, block);
548 linesize = s->picture.linesize[6 - j];
549 c_ptr = s->picture.data[6 - j] + c_offset;
551 for(y = 0;y < (1<<log2_blocksize); y++) {
552 ptr1= ptr + (1<<(log2_blocksize-1));
553 c_ptr1 = c_ptr + (linesize<<log2_blocksize);
554 for(x=0; x < (1<<(log2_blocksize-1)); x++){
555 c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x];
561 /* don't ask me why they inverted Cb and Cr ! */
562 idct_put(s->picture.data[6 - j] + c_offset,
563 s->picture.linesize[6 - j], block);
573 #ifdef DV_CODEC_TINY_TARGET
574 /* Converts run and level (where level != 0) pair into vlc, returning bit size */
575 static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
578 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
579 *vlc = dv_vlc_map[run][level].vlc | sign;
580 size = dv_vlc_map[run][level].size;
583 if (level < DV_VLC_MAP_LEV_SIZE) {
584 *vlc = dv_vlc_map[0][level].vlc | sign;
585 size = dv_vlc_map[0][level].size;
587 *vlc = 0xfe00 | (level << 1) | sign;
591 *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
592 (0x1f80 | (run - 1))) << size;
593 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
600 static always_inline int dv_rl2vlc_size(int run, int level)
604 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
605 size = dv_vlc_map[run][level].size;
608 size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
610 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
616 static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
618 *vlc = dv_vlc_map[run][l].vlc | sign;
619 return dv_vlc_map[run][l].size;
622 static always_inline int dv_rl2vlc_size(int run, int l)
624 return dv_vlc_map[run][l].size;
628 typedef struct EncBlockInfo {
638 uint8_t partial_bit_count;
639 uint32_t partial_bit_buffer; /* we can't use uint16_t here */
642 static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
643 PutBitContext* pb_end)
647 PutBitContext* pb = pb_pool;
648 int size = bi->partial_bit_count;
649 uint32_t vlc = bi->partial_bit_buffer;
651 bi->partial_bit_count = bi->partial_bit_buffer = 0;
653 /* Find suitable storage space */
654 for (; size > (bits_left = put_bits_left(pb)); pb++) {
657 put_bits(pb, bits_left, vlc >> size);
658 vlc = vlc & ((1<<size)-1);
660 if (pb + 1 >= pb_end) {
661 bi->partial_bit_count = size;
662 bi->partial_bit_buffer = vlc;
668 put_bits(pb, size, vlc);
673 /* Construct the next VLC */
675 bi->cur_ac = bi->next[prev];
677 size = dv_rl2vlc(bi->cur_ac - prev - 1, bi->mb[bi->cur_ac], bi->sign[bi->cur_ac], &vlc);
679 size = 4; vlc = 6; /* End Of Block stamp */
685 static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
686 const uint8_t* zigzag_scan, const int *weight, int bias)
689 /* We offer two different methods for class number assignment: the
690 method suggested in SMPTE 314M Table 22, and an improved
691 method. The SMPTE method is very conservative; it assigns class
692 3 (i.e. severe quantization) to any block where the largest AC
693 component is greater than 36. ffmpeg's DV encoder tracks AC bit
694 consumption precisely, so there is no need to bias most blocks
695 towards strongly lossy compression. Instead, we assign class 2
696 to most blocks, and use class 3 only when strictly necessary
697 (for blocks whose largest AC component exceeds 255). */
699 #if 0 /* SMPTE spec method */
700 static const int classes[] = {12, 24, 36, 0xffff};
701 #else /* improved ffmpeg method */
702 static const int classes[] = {-1, -1, 255, 0xffff};
709 for (area = 0; area < 4; area++) {
710 bi->prev[area] = prev;
711 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
712 for (i=mb_area_start[area]; i<mb_area_start[area+1]; i++) {
713 int level = blk[zigzag_scan[i]];
715 if (level+15 > 30U) {
716 bi->sign[i] = (level>>31)&1;
717 /* weigh it and and shift down into range, adding for rounding */
718 /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT
719 AND the 2x doubling of the weights */
720 level = (ABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4);
722 if(level>max) max= level;
723 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level);
730 for(bi->cno = 0; max > classes[bi->cno]; bi->cno++);
738 for (area = 0; area < 4; area++) {
739 bi->prev[area] = prev;
740 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
741 for (; i<mb_area_start[area+1]; i= bi->next[i]) {
745 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, bi->mb[i]);
755 //FIXME replace this by dsputil
756 #define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7))
757 static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
763 /* Compute 8-8 score (small values give a better chance for 8-8 DCT) */
766 score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) +
767 SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
770 /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */
773 score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) +
774 SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
778 return (score88 - score248 > -10);
781 static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
784 int i, j, k, a, prev, a2;
787 size[0] = size[1] = size[2] = size[3] = size[4] = 1<<24;
790 for (i=0; i<5; i++) {
796 for (j=0; j<6; j++, b++) {
797 for (a=0; a<4; a++) {
798 if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
799 b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
802 assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]);
803 for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
806 b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
809 if(b->next[k] >= mb_area_start[a+1] && b->next[k]<64){
810 for(a2=a+1; b->next[k] >= mb_area_start[a2+1]; a2++)
813 assert(b->mb[b->next[k]]);
814 b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]])
815 -dv_rl2vlc_size(b->next[k] - k - 1, b->mb[b->next[k]]);
816 assert(b->prev[a2]==k && (a2+1 >= 4 || b->prev[a2+1]!=k));
819 b->next[prev] = b->next[k];
824 size[i] += b->bit_size[a];
827 if(vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4])
830 } while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]);
833 for(a=2; a==2 || vs_total_ac_bits < size[0]; a+=a){
835 size[0] = 5*6*4; //EOB
836 for (j=0; j<6*5; j++, b++) {
838 for (k= b->next[prev]; k<64; k= b->next[k]) {
839 if(b->mb[k] < a && b->mb[k] > -a){
840 b->next[prev] = b->next[k];
842 size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
850 static inline void dv_encode_video_segment(DVVideoContext *s,
852 const uint16_t *mb_pos_ptr)
854 int mb_index, i, j, v;
855 int mb_x, mb_y, c_offset, linesize;
860 DECLARE_ALIGNED_8(DCTELEM, block[64]);
861 EncBlockInfo enc_blks[5*6];
862 PutBitContext pbs[5*6];
864 EncBlockInfo* enc_blk;
868 assert((((int)block) & 7) == 0);
870 enc_blk = &enc_blks[0];
872 for(mb_index = 0; mb_index < 5; mb_index++) {
876 if (s->sys->pix_fmt == PIX_FMT_YUV422P) {
877 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 4);
879 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
881 if (s->sys->pix_fmt == PIX_FMT_YUV420P) {
882 c_offset = (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
883 } else { /* 4:2:2 or 4:1:1 */
884 c_offset = ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8));
887 qnos[mb_index] = 15; /* No quantization */
888 ptr = dif + mb_index*80 + 4;
889 for(j = 0;j < 6; j++) {
891 if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */
892 if (j == 0 || j == 2) {
894 data = y_ptr + ((j>>1) * 8);
895 linesize = s->picture.linesize[0];
898 data = s->picture.data[6 - j] + c_offset;
899 linesize = s->picture.linesize[6 - j];
901 /* j=1 and j=3 are "dummy" blocks, used for AC data only */
906 } else { /* 4:1:1 or 4:2:0 */
907 if (j < 4) { /* Four Y blocks */
908 /* NOTE: at end of line, the macroblock is handled as 420 */
909 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
910 data = y_ptr + (j * 8);
912 data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
914 linesize = s->picture.linesize[0];
915 } else { /* Cr and Cb blocks */
916 /* don't ask Fabrice why they inverted Cb and Cr ! */
917 data = s->picture.data[6 - j] + c_offset;
918 linesize = s->picture.linesize[6 - j];
919 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
924 /* Everything is set up -- now just copy data -> DCT block */
925 if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */
929 d = data + 8 * linesize;
930 b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
931 b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
935 } else { /* Simple copy: 8x8 -> 8x8 */
937 s->get_pixels(block, data, linesize);
940 if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
941 enc_blk->dct_mode = dv_guess_dct_mode(block);
943 enc_blk->dct_mode = 0;
944 enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
945 enc_blk->partial_bit_count = 0;
946 enc_blk->partial_bit_buffer = 0;
950 /* We rely on the fact that encoding all zeros leads to an immediate EOB,
951 which is precisely what the spec calls for in the "dummy" blocks. */
952 memset(block, 0, sizeof(block));
954 s->fdct[enc_blk->dct_mode](block);
957 dv_set_class_number(block, enc_blk,
958 enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct,
959 enc_blk->dct_mode ? dv_weight_248 : dv_weight_88,
962 init_put_bits(pb, ptr, block_sizes[j]/8);
963 put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
964 put_bits(pb, 1, enc_blk->dct_mode);
965 put_bits(pb, 2, enc_blk->cno);
967 vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
968 enc_blk->bit_size[2] + enc_blk->bit_size[3];
971 ptr += block_sizes[j]/8;
975 if (vs_total_ac_bits < vs_bit_size)
976 dv_guess_qnos(&enc_blks[0], &qnos[0]);
978 for (i=0; i<5; i++) {
979 dif[i*80 + 3] = qnos[i];
982 /* First pass over individual cells only */
983 for (j=0; j<5*6; j++)
984 dv_encode_ac(&enc_blks[j], &pbs[j], &pbs[j+1]);
986 /* Second pass over each MB space */
987 for (j=0; j<5*6; j+=6) {
989 for (i=0; i<6; i++) {
990 if (enc_blks[i+j].partial_bit_count)
991 pb=dv_encode_ac(&enc_blks[i+j], pb, &pbs[j+6]);
995 /* Third and final pass over the whole vides segment space */
997 for (j=0; j<5*6; j++) {
998 if (enc_blks[j].partial_bit_count)
999 pb=dv_encode_ac(&enc_blks[j], pb, &pbs[6*5]);
1000 if (enc_blks[j].partial_bit_count)
1001 av_log(NULL, AV_LOG_ERROR, "ac bitstream overflow\n");
1004 for (j=0; j<5*6; j++)
1005 flush_put_bits(&pbs[j]);
1008 static int dv_decode_mt(AVCodecContext *avctx, void* sl)
1010 DVVideoContext *s = avctx->priv_data;
1011 int slice = (size_t)sl;
1013 /* which DIF channel is this? */
1014 int chan = slice / (s->sys->difseg_size * 27);
1016 /* slice within the DIF channel */
1017 int chan_slice = slice % (s->sys->difseg_size * 27);
1019 /* byte offset of this channel's data */
1020 int chan_offset = chan * s->sys->difseg_size * 150 * 80;
1022 dv_decode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset],
1023 &s->sys->video_place[slice*5]);
1027 static int dv_encode_mt(AVCodecContext *avctx, void* sl)
1029 DVVideoContext *s = avctx->priv_data;
1030 int slice = (size_t)sl;
1032 /* which DIF channel is this? */
1033 int chan = slice / (s->sys->difseg_size * 27);
1035 /* slice within the DIF channel */
1036 int chan_slice = slice % (s->sys->difseg_size * 27);
1038 /* byte offset of this channel's data */
1039 int chan_offset = chan * s->sys->difseg_size * 150 * 80;
1041 dv_encode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset],
1042 &s->sys->video_place[slice*5]);
1046 /* NOTE: exactly one frame must be given (120000 bytes for NTSC,
1047 144000 bytes for PAL - or twice those for 50Mbps) */
1048 static int dvvideo_decode_frame(AVCodecContext *avctx,
1049 void *data, int *data_size,
1050 uint8_t *buf, int buf_size)
1052 DVVideoContext *s = avctx->priv_data;
1054 s->sys = dv_frame_profile(buf);
1055 if (!s->sys || buf_size < s->sys->frame_size)
1056 return -1; /* NOTE: we only accept several full frames */
1058 if(s->picture.data[0])
1059 avctx->release_buffer(avctx, &s->picture);
1061 s->picture.reference = 0;
1062 s->picture.key_frame = 1;
1063 s->picture.pict_type = FF_I_TYPE;
1064 avctx->pix_fmt = s->sys->pix_fmt;
1065 avcodec_set_dimensions(avctx, s->sys->width, s->sys->height);
1066 if(avctx->get_buffer(avctx, &s->picture) < 0) {
1067 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1070 s->picture.interlaced_frame = 1;
1071 s->picture.top_field_first = 0;
1074 avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL,
1075 s->sys->n_difchan * s->sys->difseg_size * 27);
1080 *data_size = sizeof(AVFrame);
1081 *(AVFrame*)data= s->picture;
1083 return s->sys->frame_size;
1086 static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
1089 DVVideoContext *s = c->priv_data;
1091 s->sys = dv_codec_profile(c);
1094 if(buf_size < s->sys->frame_size)
1097 c->pix_fmt = s->sys->pix_fmt;
1098 s->picture = *((AVFrame *)data);
1099 s->picture.key_frame = 1;
1100 s->picture.pict_type = FF_I_TYPE;
1103 c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
1104 s->sys->n_difchan * s->sys->difseg_size * 27);
1108 /* Fill in just enough of the header for dv_frame_profile() to
1109 return the correct result, so that the frame can be decoded
1110 correctly. The rest of the metadata is filled in by the dvvideo
1111 avformat. (this should probably change so that encode_frame()
1112 fills in ALL of the metadata - e.g. for Quicktime-wrapped DV
1115 /* NTSC/PAL format */
1116 buf[3] = s->sys->dsf ? 0x80 : 0x00;
1118 /* 25Mbps or 50Mbps */
1119 buf[80*5 + 48 + 3] = (s->sys->pix_fmt == PIX_FMT_YUV422P) ? 0x4 : 0x0;
1121 return s->sys->frame_size;
1124 static int dvvideo_close(AVCodecContext *c)
1131 #ifdef CONFIG_DVVIDEO_ENCODER
1132 AVCodec dvvideo_encoder = {
1136 sizeof(DVVideoContext),
1138 dvvideo_encode_frame,
1144 #endif // CONFIG_DVVIDEO_ENCODER
1146 AVCodec dvvideo_decoder = {
1150 sizeof(DVVideoContext),
1154 dvvideo_decode_frame,