4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
24 #include "libavutil/pixdesc.h"
28 #include "bytestream.h"
30 #include "proresdata.h"
32 #define CFACTOR_Y422 2
33 #define CFACTOR_Y444 3
35 #define MAX_MBS_PER_SLICE 8
40 PRORES_PROFILE_PROXY = 0,
42 PRORES_PROFILE_STANDARD,
55 static const uint8_t prores_quant_matrices[][64] = {
57 4, 7, 9, 11, 13, 14, 15, 63,
58 7, 7, 11, 12, 14, 15, 63, 63,
59 9, 11, 13, 14, 15, 63, 63, 63,
60 11, 11, 13, 14, 63, 63, 63, 63,
61 11, 13, 14, 63, 63, 63, 63, 63,
62 13, 14, 63, 63, 63, 63, 63, 63,
63 13, 63, 63, 63, 63, 63, 63, 63,
64 63, 63, 63, 63, 63, 63, 63, 63,
67 4, 5, 6, 7, 9, 11, 13, 15,
68 5, 5, 7, 8, 11, 13, 15, 17,
69 6, 7, 9, 11, 13, 15, 15, 17,
70 7, 7, 9, 11, 13, 15, 17, 19,
71 7, 9, 11, 13, 14, 16, 19, 23,
72 9, 11, 13, 14, 16, 19, 23, 29,
73 9, 11, 13, 15, 17, 21, 28, 35,
74 11, 13, 16, 17, 21, 28, 35, 41,
77 4, 4, 5, 5, 6, 7, 7, 9,
78 4, 4, 5, 6, 7, 7, 9, 9,
79 5, 5, 6, 7, 7, 9, 9, 10,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 6, 7, 7, 8, 9, 10, 12,
82 6, 7, 7, 8, 9, 10, 12, 15,
83 6, 7, 7, 9, 10, 11, 14, 17,
84 7, 7, 9, 10, 11, 14, 17, 21,
87 4, 4, 4, 4, 4, 4, 4, 4,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 5,
91 4, 4, 4, 4, 4, 4, 5, 5,
92 4, 4, 4, 4, 4, 5, 5, 6,
93 4, 4, 4, 4, 5, 5, 6, 7,
94 4, 4, 4, 4, 5, 6, 7, 7,
97 4, 4, 4, 4, 4, 4, 4, 4,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
108 #define NUM_MB_LIMITS 4
109 static const int prores_mb_limits[NUM_MB_LIMITS] = {
110 1620, // up to 720x576
111 2700, // up to 960x720
112 6075, // up to 1440x1080
113 9216, // up to 2048x1152
116 static const struct prores_profile {
117 const char *full_name;
121 int br_tab[NUM_MB_LIMITS];
123 } prores_profile_info[5] = {
125 .full_name = "proxy",
126 .tag = MKTAG('a', 'p', 'c', 'o'),
129 .br_tab = { 300, 242, 220, 194 },
130 .quant = QUANT_MAT_PROXY,
134 .tag = MKTAG('a', 'p', 'c', 's'),
137 .br_tab = { 720, 560, 490, 440 },
138 .quant = QUANT_MAT_LT,
141 .full_name = "standard",
142 .tag = MKTAG('a', 'p', 'c', 'n'),
145 .br_tab = { 1050, 808, 710, 632 },
146 .quant = QUANT_MAT_STANDARD,
149 .full_name = "high quality",
150 .tag = MKTAG('a', 'p', 'c', 'h'),
153 .br_tab = { 1566, 1216, 1070, 950 },
154 .quant = QUANT_MAT_HQ,
158 .tag = MKTAG('a', 'p', '4', 'h'),
161 .br_tab = { 2350, 1828, 1600, 1425 },
162 .quant = QUANT_MAT_HQ,
166 #define TRELLIS_WIDTH 16
167 #define SCORE_LIMIT INT_MAX / 2
176 #define MAX_STORED_Q 16
178 typedef struct ProresThreadData {
179 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
180 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
181 int16_t custom_q[64];
182 struct TrellisNode *nodes;
185 typedef struct ProresContext {
187 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
188 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
189 int16_t quants[MAX_STORED_Q][64];
190 int16_t custom_q[64];
191 const uint8_t *quant_mat;
192 const uint8_t *scantable;
194 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
195 int linesize, int16_t *block);
198 int mb_width, mb_height;
200 int num_chroma_blocks, chroma_factor;
202 int slices_per_picture;
203 int pictures_per_frame; // 1 for progressive, 2 for interlaced
214 int frame_size_upper_bound;
217 const struct prores_profile *profile_info;
221 ProresThreadData *tdata;
224 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
225 int linesize, int x, int y, int w, int h,
226 int16_t *blocks, uint16_t *emu_buf,
227 int mbs_per_slice, int blocks_per_mb, int is_chroma)
229 const uint16_t *esrc;
230 const int mb_width = 4 * blocks_per_mb;
234 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
236 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
240 if (x + mb_width <= w && y + 16 <= h) {
242 elinesize = linesize;
247 elinesize = 16 * sizeof(*emu_buf);
249 bw = FFMIN(w - x, mb_width);
250 bh = FFMIN(h - y, 16);
252 for (j = 0; j < bh; j++) {
253 memcpy(emu_buf + j * 16,
254 (const uint8_t*)src + j * linesize,
256 pix = emu_buf[j * 16 + bw - 1];
257 for (k = bw; k < mb_width; k++)
258 emu_buf[j * 16 + k] = pix;
261 memcpy(emu_buf + j * 16,
262 emu_buf + (bh - 1) * 16,
263 mb_width * sizeof(*emu_buf));
266 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
268 if (blocks_per_mb > 2) {
269 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
272 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
274 if (blocks_per_mb > 2) {
275 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
279 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
281 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
283 if (blocks_per_mb > 2) {
284 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
286 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
295 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
296 int linesize, int x, int y, int w, int h,
297 int16_t *blocks, int mbs_per_slice, int abits)
299 const int slice_width = 16 * mbs_per_slice;
300 int i, j, copy_w, copy_h;
302 copy_w = FFMIN(w - x, slice_width);
303 copy_h = FFMIN(h - y, 16);
304 for (i = 0; i < copy_h; i++) {
305 memcpy(blocks, src, copy_w * sizeof(*src));
307 for (j = 0; j < copy_w; j++)
310 for (j = 0; j < copy_w; j++)
311 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
312 for (j = copy_w; j < slice_width; j++)
313 blocks[j] = blocks[copy_w - 1];
314 blocks += slice_width;
315 src += linesize >> 1;
317 for (; i < 16; i++) {
318 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
319 blocks += slice_width;
324 * Write an unsigned rice/exp golomb codeword.
326 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
328 unsigned int rice_order, exp_order, switch_bits, switch_val;
331 /* number of prefix bits to switch between Rice and expGolomb */
332 switch_bits = (codebook & 3) + 1;
333 rice_order = codebook >> 5; /* rice code order */
334 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
336 switch_val = switch_bits << rice_order;
338 if (val >= switch_val) {
339 val -= switch_val - (1 << exp_order);
340 exponent = av_log2(val);
342 put_bits(pb, exponent - exp_order + switch_bits, 0);
343 put_bits(pb, exponent + 1, val);
345 exponent = val >> rice_order;
348 put_bits(pb, exponent, 0);
351 put_sbits(pb, rice_order, val);
355 #define GET_SIGN(x) ((x) >> 31)
356 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
358 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
359 int blocks_per_slice, int scale)
362 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
364 prev_dc = (blocks[0] - 0x4000) / scale;
365 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
370 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
371 dc = (blocks[0] - 0x4000) / scale;
372 delta = dc - prev_dc;
373 new_sign = GET_SIGN(delta);
374 delta = (delta ^ sign) - sign;
375 code = MAKE_CODE(delta);
376 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
377 codebook = (code + (code & 1)) >> 1;
378 codebook = FFMIN(codebook, 3);
384 static void encode_acs(PutBitContext *pb, int16_t *blocks,
385 int blocks_per_slice,
386 int plane_size_factor,
387 const uint8_t *scan, const int16_t *qmat)
390 int run, level, run_cb, lev_cb;
391 int max_coeffs, abs_level;
393 max_coeffs = blocks_per_slice << 6;
394 run_cb = ff_prores_run_to_cb_index[4];
395 lev_cb = ff_prores_lev_to_cb_index[2];
398 for (i = 1; i < 64; i++) {
399 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
400 level = blocks[idx] / qmat[scan[i]];
402 abs_level = FFABS(level);
403 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
404 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
406 put_sbits(pb, 1, GET_SIGN(level));
408 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
409 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
418 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
419 const uint16_t *src, int linesize,
420 int mbs_per_slice, int16_t *blocks,
421 int blocks_per_mb, int plane_size_factor,
424 int blocks_per_slice, saved_pos;
426 saved_pos = put_bits_count(pb);
427 blocks_per_slice = mbs_per_slice * blocks_per_mb;
429 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
430 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
431 ctx->scantable, qmat);
434 return (put_bits_count(pb) - saved_pos) >> 3;
437 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
439 const int mask = (1 << abits) - 1;
440 const int dbits = (abits == 8) ? 4 : 7;
441 const int dsize = 1 << dbits - 1;
442 int diff = cur - prev;
445 if (diff >= (1 << abits) - dsize)
447 if (diff < -dsize || diff > dsize || !diff) {
449 put_bits(pb, abits, diff);
452 put_bits(pb, dbits - 1, FFABS(diff) - 1);
453 put_bits(pb, 1, diff < 0);
457 static void put_alpha_run(PutBitContext *pb, int run)
462 put_bits(pb, 4, run);
464 put_bits(pb, 15, run);
470 // todo alpha quantisation for high quants
471 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
472 int mbs_per_slice, uint16_t *blocks,
475 const int abits = ctx->alpha_bits;
476 const int mask = (1 << abits) - 1;
477 const int num_coeffs = mbs_per_slice * 256;
478 int saved_pos = put_bits_count(pb);
479 int prev = mask, cur;
484 put_alpha_diff(pb, cur, prev, abits);
489 put_alpha_run (pb, run);
490 put_alpha_diff(pb, cur, prev, abits);
496 } while (idx < num_coeffs);
498 put_alpha_run(pb, run);
500 return (put_bits_count(pb) - saved_pos) >> 3;
503 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
505 int sizes[4], int x, int y, int quant,
508 ProresContext *ctx = avctx->priv_data;
512 int slice_width_factor = av_log2(mbs_per_slice);
513 int num_cblocks, pwidth, linesize, line_add;
514 int plane_factor, is_chroma;
517 if (ctx->pictures_per_frame == 1)
520 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
522 if (ctx->force_quant) {
523 qmat = ctx->quants[0];
524 } else if (quant < MAX_STORED_Q) {
525 qmat = ctx->quants[quant];
527 qmat = ctx->custom_q;
528 for (i = 0; i < 64; i++)
529 qmat[i] = ctx->quant_mat[i] * quant;
532 for (i = 0; i < ctx->num_planes; i++) {
533 is_chroma = (i == 1 || i == 2);
534 plane_factor = slice_width_factor + 2;
536 plane_factor += ctx->chroma_factor - 3;
537 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
541 pwidth = avctx->width;
546 pwidth = avctx->width >> 1;
549 linesize = pic->linesize[i] * ctx->pictures_per_frame;
550 src = (const uint16_t*)(pic->data[i] + yp * linesize +
551 line_add * pic->linesize[i]) + xp;
554 get_slice_data(ctx, src, linesize, xp, yp,
555 pwidth, avctx->height / ctx->pictures_per_frame,
556 ctx->blocks[0], ctx->emu_buf,
557 mbs_per_slice, num_cblocks, is_chroma);
558 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
559 mbs_per_slice, ctx->blocks[0],
560 num_cblocks, plane_factor,
563 get_alpha_data(ctx, src, linesize, xp, yp,
564 pwidth, avctx->height / ctx->pictures_per_frame,
565 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
566 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
567 ctx->blocks[0], quant);
569 total_size += sizes[i];
570 if (put_bits_left(pb) < 0) {
571 av_log(avctx, AV_LOG_ERROR,
572 "Underestimated required buffer size.\n");
579 static inline int estimate_vlc(unsigned codebook, int val)
581 unsigned int rice_order, exp_order, switch_bits, switch_val;
584 /* number of prefix bits to switch between Rice and expGolomb */
585 switch_bits = (codebook & 3) + 1;
586 rice_order = codebook >> 5; /* rice code order */
587 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
589 switch_val = switch_bits << rice_order;
591 if (val >= switch_val) {
592 val -= switch_val - (1 << exp_order);
593 exponent = av_log2(val);
595 return exponent * 2 - exp_order + switch_bits + 1;
597 return (val >> rice_order) + rice_order + 1;
601 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
605 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
608 prev_dc = (blocks[0] - 0x4000) / scale;
609 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
613 *error += FFABS(blocks[0] - 0x4000) % scale;
615 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
616 dc = (blocks[0] - 0x4000) / scale;
617 *error += FFABS(blocks[0] - 0x4000) % scale;
618 delta = dc - prev_dc;
619 new_sign = GET_SIGN(delta);
620 delta = (delta ^ sign) - sign;
621 code = MAKE_CODE(delta);
622 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
623 codebook = (code + (code & 1)) >> 1;
624 codebook = FFMIN(codebook, 3);
632 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
633 int plane_size_factor,
634 const uint8_t *scan, const int16_t *qmat)
637 int run, level, run_cb, lev_cb;
638 int max_coeffs, abs_level;
641 max_coeffs = blocks_per_slice << 6;
642 run_cb = ff_prores_run_to_cb_index[4];
643 lev_cb = ff_prores_lev_to_cb_index[2];
646 for (i = 1; i < 64; i++) {
647 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
648 level = blocks[idx] / qmat[scan[i]];
649 *error += FFABS(blocks[idx]) % qmat[scan[i]];
651 abs_level = FFABS(level);
652 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
653 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
656 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
657 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
668 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
669 const uint16_t *src, int linesize,
671 int blocks_per_mb, int plane_size_factor,
672 const int16_t *qmat, ProresThreadData *td)
674 int blocks_per_slice;
677 blocks_per_slice = mbs_per_slice * blocks_per_mb;
679 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
680 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
681 plane_size_factor, ctx->scantable, qmat);
683 return FFALIGN(bits, 8);
686 static int est_alpha_diff(int cur, int prev, int abits)
688 const int mask = (1 << abits) - 1;
689 const int dbits = (abits == 8) ? 4 : 7;
690 const int dsize = 1 << dbits - 1;
691 int diff = cur - prev;
694 if (diff >= (1 << abits) - dsize)
696 if (diff < -dsize || diff > dsize || !diff)
702 static int estimate_alpha_plane(ProresContext *ctx, int *error,
703 const uint16_t *src, int linesize,
704 int mbs_per_slice, int quant,
707 const int abits = ctx->alpha_bits;
708 const int mask = (1 << abits) - 1;
709 const int num_coeffs = mbs_per_slice * 256;
710 int prev = mask, cur;
717 bits = est_alpha_diff(cur, prev, abits);
728 bits += est_alpha_diff(cur, prev, abits);
734 } while (idx < num_coeffs);
746 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
747 int trellis_node, int x, int y, int mbs_per_slice,
748 ProresThreadData *td)
750 ProresContext *ctx = avctx->priv_data;
751 int i, q, pq, xp, yp;
753 int slice_width_factor = av_log2(mbs_per_slice);
754 int num_cblocks[MAX_PLANES], pwidth;
755 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
756 const int min_quant = ctx->profile_info->min_quant;
757 const int max_quant = ctx->profile_info->max_quant;
758 int error, bits, bits_limit;
759 int mbs, prev, cur, new_score;
760 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
763 int linesize[4], line_add;
765 if (ctx->pictures_per_frame == 1)
768 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
769 mbs = x + mbs_per_slice;
771 for (i = 0; i < ctx->num_planes; i++) {
772 is_chroma[i] = (i == 1 || i == 2);
773 plane_factor[i] = slice_width_factor + 2;
775 plane_factor[i] += ctx->chroma_factor - 3;
776 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
780 pwidth = avctx->width;
785 pwidth = avctx->width >> 1;
788 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
789 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
790 line_add * pic->linesize[i]) + xp;
793 get_slice_data(ctx, src, linesize[i], xp, yp,
794 pwidth, avctx->height / ctx->pictures_per_frame,
795 td->blocks[i], td->emu_buf,
796 mbs_per_slice, num_cblocks[i], is_chroma[i]);
798 get_alpha_data(ctx, src, linesize[i], xp, yp,
799 pwidth, avctx->height / ctx->pictures_per_frame,
800 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
804 for (q = min_quant; q < max_quant + 2; q++) {
805 td->nodes[trellis_node + q].prev_node = -1;
806 td->nodes[trellis_node + q].quant = q;
809 // todo: maybe perform coarser quantising to fit into frame size when needed
810 for (q = min_quant; q <= max_quant; q++) {
813 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
814 bits += estimate_slice_plane(ctx, &error, i,
817 num_cblocks[i], plane_factor[i],
821 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
822 mbs_per_slice, q, td->blocks[3]);
823 if (bits > 65000 * 8) {
827 slice_bits[q] = bits;
828 slice_score[q] = error;
830 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
831 slice_bits[max_quant + 1] = slice_bits[max_quant];
832 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
833 overquant = max_quant;
835 for (q = max_quant + 1; q < 128; q++) {
838 if (q < MAX_STORED_Q) {
839 qmat = ctx->quants[q];
842 for (i = 0; i < 64; i++)
843 qmat[i] = ctx->quant_mat[i] * q;
845 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
846 bits += estimate_slice_plane(ctx, &error, i,
849 num_cblocks[i], plane_factor[i],
853 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
854 mbs_per_slice, q, td->blocks[3]);
855 if (bits <= ctx->bits_per_mb * mbs_per_slice)
859 slice_bits[max_quant + 1] = bits;
860 slice_score[max_quant + 1] = error;
863 td->nodes[trellis_node + max_quant + 1].quant = overquant;
865 bits_limit = mbs * ctx->bits_per_mb;
866 for (pq = min_quant; pq < max_quant + 2; pq++) {
867 prev = trellis_node - TRELLIS_WIDTH + pq;
869 for (q = min_quant; q < max_quant + 2; q++) {
870 cur = trellis_node + q;
872 bits = td->nodes[prev].bits + slice_bits[q];
873 error = slice_score[q];
874 if (bits > bits_limit)
877 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
878 new_score = td->nodes[prev].score + error;
880 new_score = SCORE_LIMIT;
881 if (td->nodes[cur].prev_node == -1 ||
882 td->nodes[cur].score >= new_score) {
884 td->nodes[cur].bits = bits;
885 td->nodes[cur].score = new_score;
886 td->nodes[cur].prev_node = prev;
891 error = td->nodes[trellis_node + min_quant].score;
892 pq = trellis_node + min_quant;
893 for (q = min_quant + 1; q < max_quant + 2; q++) {
894 if (td->nodes[trellis_node + q].score <= error) {
895 error = td->nodes[trellis_node + q].score;
896 pq = trellis_node + q;
903 static int find_quant_thread(AVCodecContext *avctx, void *arg,
904 int jobnr, int threadnr)
906 ProresContext *ctx = avctx->priv_data;
907 ProresThreadData *td = ctx->tdata + threadnr;
908 int mbs_per_slice = ctx->mbs_per_slice;
909 int x, y = jobnr, mb, q = 0;
911 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
912 while (ctx->mb_width - x < mbs_per_slice)
914 q = find_slice_quant(avctx, avctx->coded_frame,
915 (mb + 1) * TRELLIS_WIDTH, x, y,
919 for (x = ctx->slices_width - 1; x >= 0; x--) {
920 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
921 q = td->nodes[q].prev_node;
927 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
928 const AVFrame *pic, int *got_packet)
930 ProresContext *ctx = avctx->priv_data;
931 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
932 uint8_t *picture_size_pos;
934 int x, y, i, mb, q = 0;
935 int sizes[4] = { 0 };
936 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
937 int frame_size, picture_size, slice_size;
938 int pkt_size, ret, max_slice_size = 0;
941 *avctx->coded_frame = *pic;
942 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
943 avctx->coded_frame->key_frame = 1;
945 pkt_size = ctx->frame_size_upper_bound;
947 if ((ret = ff_alloc_packet(pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0) {
948 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
952 orig_buf = pkt->data;
955 orig_buf += 4; // frame size
956 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
961 buf += 2; // frame header size will be stored here
962 bytestream_put_be16 (&buf, 0); // version 1
963 bytestream_put_buffer(&buf, ctx->vendor, 4);
964 bytestream_put_be16 (&buf, avctx->width);
965 bytestream_put_be16 (&buf, avctx->height);
967 frame_flags = ctx->chroma_factor << 6;
968 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
969 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
970 bytestream_put_byte (&buf, frame_flags);
972 bytestream_put_byte (&buf, 0); // reserved
973 bytestream_put_byte (&buf, avctx->color_primaries);
974 bytestream_put_byte (&buf, avctx->color_trc);
975 bytestream_put_byte (&buf, avctx->colorspace);
976 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
977 bytestream_put_byte (&buf, 0); // reserved
978 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
979 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
980 // luma quantisation matrix
981 for (i = 0; i < 64; i++)
982 bytestream_put_byte(&buf, ctx->quant_mat[i]);
983 // chroma quantisation matrix
984 for (i = 0; i < 64; i++)
985 bytestream_put_byte(&buf, ctx->quant_mat[i]);
987 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
989 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
991 for (ctx->cur_picture_idx = 0;
992 ctx->cur_picture_idx < ctx->pictures_per_frame;
993 ctx->cur_picture_idx++) {
995 picture_size_pos = buf + 1;
996 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
997 buf += 4; // picture data size will be stored here
998 bytestream_put_be16 (&buf, ctx->slices_per_picture);
999 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1001 // seek table - will be filled during slice encoding
1003 buf += ctx->slices_per_picture * 2;
1006 if (!ctx->force_quant) {
1007 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1013 for (y = 0; y < ctx->mb_height; y++) {
1014 int mbs_per_slice = ctx->mbs_per_slice;
1015 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1016 q = ctx->force_quant ? ctx->force_quant
1017 : ctx->slice_q[mb + y * ctx->slices_width];
1019 while (ctx->mb_width - x < mbs_per_slice)
1020 mbs_per_slice >>= 1;
1022 bytestream_put_byte(&buf, slice_hdr_size << 3);
1024 buf += slice_hdr_size - 1;
1025 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1026 uint8_t *start = pkt->data;
1027 // Recompute new size according to max_slice_size
1029 int delta = 200 + ctx->pictures_per_frame *
1030 ctx->slices_per_picture * max_slice_size -
1033 delta = FFMAX(delta, 2 * max_slice_size);
1034 ctx->frame_size_upper_bound += delta;
1037 avpriv_request_sample(avctx,
1038 "Packet too small: is %i,"
1039 " needs %i (slice: %i). "
1040 "Correct allocation",
1041 pkt_size, delta, max_slice_size);
1045 ret = av_grow_packet(pkt, delta);
1051 orig_buf = pkt->data + (orig_buf - start);
1052 buf = pkt->data + (buf - start);
1053 picture_size_pos = pkt->data + (picture_size_pos - start);
1054 slice_sizes = pkt->data + (slice_sizes - start);
1055 slice_hdr = pkt->data + (slice_hdr - start);
1056 tmp = pkt->data + (tmp - start);
1058 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1059 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1064 bytestream_put_byte(&slice_hdr, q);
1065 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1066 for (i = 0; i < ctx->num_planes - 1; i++) {
1067 bytestream_put_be16(&slice_hdr, sizes[i]);
1068 slice_size += sizes[i];
1070 bytestream_put_be16(&slice_sizes, slice_size);
1071 buf += slice_size - slice_hdr_size;
1072 if (max_slice_size < slice_size)
1073 max_slice_size = slice_size;
1077 if (ctx->pictures_per_frame == 1)
1078 picture_size = buf - picture_size_pos - 6;
1080 picture_size = buf - picture_size_pos + 1;
1081 bytestream_put_be32(&picture_size_pos, picture_size);
1085 frame_size = buf - orig_buf;
1086 bytestream_put_be32(&orig_buf, frame_size);
1088 pkt->size = frame_size;
1089 pkt->flags |= AV_PKT_FLAG_KEY;
1095 static av_cold int encode_close(AVCodecContext *avctx)
1097 ProresContext *ctx = avctx->priv_data;
1100 av_freep(&avctx->coded_frame);
1103 for (i = 0; i < avctx->thread_count; i++)
1104 av_free(ctx->tdata[i].nodes);
1106 av_freep(&ctx->tdata);
1107 av_freep(&ctx->slice_q);
1112 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1113 int linesize, int16_t *block)
1116 const uint16_t *tsrc = src;
1118 for (y = 0; y < 8; y++) {
1119 for (x = 0; x < 8; x++)
1120 block[y * 8 + x] = tsrc[x];
1121 tsrc += linesize >> 1;
1126 static av_cold int encode_init(AVCodecContext *avctx)
1128 ProresContext *ctx = avctx->priv_data;
1131 int min_quant, max_quant;
1132 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1134 avctx->bits_per_raw_sample = 10;
1135 avctx->coded_frame = av_frame_alloc();
1136 if (!avctx->coded_frame)
1137 return AVERROR(ENOMEM);
1139 ctx->fdct = prores_fdct;
1140 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1141 : ff_prores_progressive_scan;
1142 ff_fdctdsp_init(&ctx->fdsp, avctx);
1144 mps = ctx->mbs_per_slice;
1145 if (mps & (mps - 1)) {
1146 av_log(avctx, AV_LOG_ERROR,
1147 "there should be an integer power of two MBs per slice\n");
1148 return AVERROR(EINVAL);
1150 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1151 if (ctx->alpha_bits & 7) {
1152 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1153 return AVERROR(EINVAL);
1156 ctx->alpha_bits = 0;
1159 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1162 ctx->profile_info = prores_profile_info + ctx->profile;
1163 ctx->num_planes = 3 + !!ctx->alpha_bits;
1165 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1168 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1170 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1172 ctx->slices_width = ctx->mb_width / mps;
1173 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1174 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1175 ctx->pictures_per_frame = 1 + interlaced;
1177 if (ctx->quant_sel == -1)
1178 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1180 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1182 if (strlen(ctx->vendor) != 4) {
1183 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1184 return AVERROR_INVALIDDATA;
1187 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1188 if (!ctx->force_quant) {
1189 if (!ctx->bits_per_mb) {
1190 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1191 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1192 ctx->pictures_per_frame)
1194 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1195 } else if (ctx->bits_per_mb < 128) {
1196 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1197 return AVERROR_INVALIDDATA;
1200 min_quant = ctx->profile_info->min_quant;
1201 max_quant = ctx->profile_info->max_quant;
1202 for (i = min_quant; i < MAX_STORED_Q; i++) {
1203 for (j = 0; j < 64; j++)
1204 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1207 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1208 if (!ctx->slice_q) {
1209 encode_close(avctx);
1210 return AVERROR(ENOMEM);
1213 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1215 encode_close(avctx);
1216 return AVERROR(ENOMEM);
1219 for (j = 0; j < avctx->thread_count; j++) {
1220 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1222 * sizeof(*ctx->tdata->nodes));
1223 if (!ctx->tdata[j].nodes) {
1224 encode_close(avctx);
1225 return AVERROR(ENOMEM);
1227 for (i = min_quant; i < max_quant + 2; i++) {
1228 ctx->tdata[j].nodes[i].prev_node = -1;
1229 ctx->tdata[j].nodes[i].bits = 0;
1230 ctx->tdata[j].nodes[i].score = 0;
1236 if (ctx->force_quant > 64) {
1237 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1238 return AVERROR_INVALIDDATA;
1241 for (j = 0; j < 64; j++) {
1242 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1243 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1246 ctx->bits_per_mb = ls * 8;
1247 if (ctx->chroma_factor == CFACTOR_Y444)
1248 ctx->bits_per_mb += ls * 4;
1251 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1252 ctx->slices_per_picture *
1253 (2 + 2 * ctx->num_planes +
1254 (mps * ctx->bits_per_mb) / 8)
1257 if (ctx->alpha_bits) {
1258 // The alpha plane is run-coded and might exceed the bit budget.
1259 ctx->frame_size_upper_bound += ctx->pictures_per_frame *
1260 ctx->slices_per_picture *
1261 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1262 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1265 avctx->codec_tag = ctx->profile_info->tag;
1267 av_log(avctx, AV_LOG_DEBUG,
1268 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1269 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1270 interlaced ? "yes" : "no", ctx->bits_per_mb);
1271 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1272 ctx->frame_size_upper_bound);
1277 #define OFFSET(x) offsetof(ProresContext, x)
1278 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1280 static const AVOption options[] = {
1281 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1282 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1283 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1284 { .i64 = PRORES_PROFILE_STANDARD },
1285 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1286 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1287 0, 0, VE, "profile" },
1288 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1289 0, 0, VE, "profile" },
1290 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1291 0, 0, VE, "profile" },
1292 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1293 0, 0, VE, "profile" },
1294 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1295 0, 0, VE, "profile" },
1296 { "vendor", "vendor ID", OFFSET(vendor),
1297 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1298 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1299 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1300 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1301 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1302 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1303 0, 0, VE, "quant_mat" },
1304 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1305 0, 0, VE, "quant_mat" },
1306 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1307 0, 0, VE, "quant_mat" },
1308 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1309 0, 0, VE, "quant_mat" },
1310 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1311 0, 0, VE, "quant_mat" },
1312 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1313 0, 0, VE, "quant_mat" },
1314 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1315 { .i64 = 16 }, 0, 16, VE },
1319 static const AVClass proresenc_class = {
1320 .class_name = "ProRes encoder",
1321 .item_name = av_default_item_name,
1323 .version = LIBAVUTIL_VERSION_INT,
1326 AVCodec ff_prores_encoder = {
1328 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1329 .type = AVMEDIA_TYPE_VIDEO,
1330 .id = AV_CODEC_ID_PRORES,
1331 .priv_data_size = sizeof(ProresContext),
1332 .init = encode_init,
1333 .close = encode_close,
1334 .encode2 = encode_frame,
1335 .capabilities = CODEC_CAP_SLICE_THREADS,
1336 .pix_fmts = (const enum AVPixelFormat[]) {
1337 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1338 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1340 .priv_class = &proresenc_class,