4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
24 #include "libavutil/pixdesc.h"
29 #include "bytestream.h"
31 #include "proresdata.h"
33 #define CFACTOR_Y422 2
34 #define CFACTOR_Y444 3
36 #define MAX_MBS_PER_SLICE 8
41 PRORES_PROFILE_PROXY = 0,
43 PRORES_PROFILE_STANDARD,
56 static const uint8_t prores_quant_matrices[][64] = {
58 4, 7, 9, 11, 13, 14, 15, 63,
59 7, 7, 11, 12, 14, 15, 63, 63,
60 9, 11, 13, 14, 15, 63, 63, 63,
61 11, 11, 13, 14, 63, 63, 63, 63,
62 11, 13, 14, 63, 63, 63, 63, 63,
63 13, 14, 63, 63, 63, 63, 63, 63,
64 13, 63, 63, 63, 63, 63, 63, 63,
65 63, 63, 63, 63, 63, 63, 63, 63,
68 4, 5, 6, 7, 9, 11, 13, 15,
69 5, 5, 7, 8, 11, 13, 15, 17,
70 6, 7, 9, 11, 13, 15, 15, 17,
71 7, 7, 9, 11, 13, 15, 17, 19,
72 7, 9, 11, 13, 14, 16, 19, 23,
73 9, 11, 13, 14, 16, 19, 23, 29,
74 9, 11, 13, 15, 17, 21, 28, 35,
75 11, 13, 16, 17, 21, 28, 35, 41,
78 4, 4, 5, 5, 6, 7, 7, 9,
79 4, 4, 5, 6, 7, 7, 9, 9,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 5, 6, 7, 7, 9, 9, 10,
82 5, 6, 7, 7, 8, 9, 10, 12,
83 6, 7, 7, 8, 9, 10, 12, 15,
84 6, 7, 7, 9, 10, 11, 14, 17,
85 7, 7, 9, 10, 11, 14, 17, 21,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 5,
92 4, 4, 4, 4, 4, 4, 5, 5,
93 4, 4, 4, 4, 4, 5, 5, 6,
94 4, 4, 4, 4, 5, 5, 6, 7,
95 4, 4, 4, 4, 5, 6, 7, 7,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
109 #define NUM_MB_LIMITS 4
110 static const int prores_mb_limits[NUM_MB_LIMITS] = {
111 1620, // up to 720x576
112 2700, // up to 960x720
113 6075, // up to 1440x1080
114 9216, // up to 2048x1152
117 static const struct prores_profile {
118 const char *full_name;
122 int br_tab[NUM_MB_LIMITS];
124 } prores_profile_info[5] = {
126 .full_name = "proxy",
127 .tag = MKTAG('a', 'p', 'c', 'o'),
130 .br_tab = { 300, 242, 220, 194 },
131 .quant = QUANT_MAT_PROXY,
135 .tag = MKTAG('a', 'p', 'c', 's'),
138 .br_tab = { 720, 560, 490, 440 },
139 .quant = QUANT_MAT_LT,
142 .full_name = "standard",
143 .tag = MKTAG('a', 'p', 'c', 'n'),
146 .br_tab = { 1050, 808, 710, 632 },
147 .quant = QUANT_MAT_STANDARD,
150 .full_name = "high quality",
151 .tag = MKTAG('a', 'p', 'c', 'h'),
154 .br_tab = { 1566, 1216, 1070, 950 },
155 .quant = QUANT_MAT_HQ,
159 .tag = MKTAG('a', 'p', '4', 'h'),
162 .br_tab = { 2350, 1828, 1600, 1425 },
163 .quant = QUANT_MAT_HQ,
167 #define TRELLIS_WIDTH 16
168 #define SCORE_LIMIT INT_MAX / 2
177 #define MAX_STORED_Q 16
179 typedef struct ProresThreadData {
180 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
181 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
182 int16_t custom_q[64];
183 struct TrellisNode *nodes;
186 typedef struct ProresContext {
188 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
189 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
190 int16_t quants[MAX_STORED_Q][64];
191 int16_t custom_q[64];
192 const uint8_t *quant_mat;
193 const uint8_t *scantable;
195 void (* fdct)(DSPContext *dsp, const uint16_t *src,
196 int linesize, int16_t *block);
199 int mb_width, mb_height;
201 int num_chroma_blocks, chroma_factor;
203 int slices_per_picture;
204 int pictures_per_frame; // 1 for progressive, 2 for interlaced
214 int frame_size_upper_bound;
217 const struct prores_profile *profile_info;
221 ProresThreadData *tdata;
224 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
225 int linesize, int x, int y, int w, int h,
226 int16_t *blocks, uint16_t *emu_buf,
227 int mbs_per_slice, int blocks_per_mb, int is_chroma)
229 const uint16_t *esrc;
230 const int mb_width = 4 * blocks_per_mb;
234 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
236 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
240 if (x + mb_width <= w && y + 16 <= h) {
242 elinesize = linesize;
247 elinesize = 16 * sizeof(*emu_buf);
249 bw = FFMIN(w - x, mb_width);
250 bh = FFMIN(h - y, 16);
252 for (j = 0; j < bh; j++) {
253 memcpy(emu_buf + j * 16,
254 (const uint8_t*)src + j * linesize,
256 pix = emu_buf[j * 16 + bw - 1];
257 for (k = bw; k < mb_width; k++)
258 emu_buf[j * 16 + k] = pix;
261 memcpy(emu_buf + j * 16,
262 emu_buf + (bh - 1) * 16,
263 mb_width * sizeof(*emu_buf));
266 ctx->fdct(&ctx->dsp, esrc, elinesize, blocks);
268 if (blocks_per_mb > 2) {
269 ctx->fdct(&ctx->dsp, esrc + 8, elinesize, blocks);
272 ctx->fdct(&ctx->dsp, esrc + elinesize * 4, elinesize, blocks);
274 if (blocks_per_mb > 2) {
275 ctx->fdct(&ctx->dsp, esrc + elinesize * 4 + 8, elinesize, blocks);
279 ctx->fdct(&ctx->dsp, esrc, elinesize, blocks);
281 ctx->fdct(&ctx->dsp, esrc + elinesize * 4, elinesize, blocks);
283 if (blocks_per_mb > 2) {
284 ctx->fdct(&ctx->dsp, esrc + 8, elinesize, blocks);
286 ctx->fdct(&ctx->dsp, esrc + elinesize * 4 + 8, elinesize, blocks);
295 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
296 int linesize, int x, int y, int w, int h,
297 int16_t *blocks, int mbs_per_slice, int abits)
299 const int slice_width = 16 * mbs_per_slice;
300 int i, j, copy_w, copy_h;
302 copy_w = FFMIN(w - x, slice_width);
303 copy_h = FFMIN(h - y, 16);
304 for (i = 0; i < copy_h; i++) {
305 memcpy(blocks, src, copy_w * sizeof(*src));
307 for (j = 0; j < copy_w; j++)
310 for (j = 0; j < copy_w; j++)
311 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
312 for (j = copy_w; j < slice_width; j++)
313 blocks[j] = blocks[copy_w - 1];
314 blocks += slice_width;
315 src += linesize >> 1;
317 for (; i < 16; i++) {
318 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
319 blocks += slice_width;
324 * Write an unsigned rice/exp golomb codeword.
326 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
328 unsigned int rice_order, exp_order, switch_bits, switch_val;
331 /* number of prefix bits to switch between Rice and expGolomb */
332 switch_bits = (codebook & 3) + 1;
333 rice_order = codebook >> 5; /* rice code order */
334 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
336 switch_val = switch_bits << rice_order;
338 if (val >= switch_val) {
339 val -= switch_val - (1 << exp_order);
340 exponent = av_log2(val);
342 put_bits(pb, exponent - exp_order + switch_bits, 0);
343 put_bits(pb, exponent + 1, val);
345 exponent = val >> rice_order;
348 put_bits(pb, exponent, 0);
351 put_sbits(pb, rice_order, val);
355 #define GET_SIGN(x) ((x) >> 31)
356 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
358 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
359 int blocks_per_slice, int scale)
362 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
364 prev_dc = (blocks[0] - 0x4000) / scale;
365 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
370 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
371 dc = (blocks[0] - 0x4000) / scale;
372 delta = dc - prev_dc;
373 new_sign = GET_SIGN(delta);
374 delta = (delta ^ sign) - sign;
375 code = MAKE_CODE(delta);
376 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
377 codebook = (code + (code & 1)) >> 1;
378 codebook = FFMIN(codebook, 3);
384 static void encode_acs(PutBitContext *pb, int16_t *blocks,
385 int blocks_per_slice,
386 int plane_size_factor,
387 const uint8_t *scan, const int16_t *qmat)
390 int run, level, run_cb, lev_cb;
391 int max_coeffs, abs_level;
393 max_coeffs = blocks_per_slice << 6;
394 run_cb = ff_prores_run_to_cb_index[4];
395 lev_cb = ff_prores_lev_to_cb_index[2];
398 for (i = 1; i < 64; i++) {
399 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
400 level = blocks[idx] / qmat[scan[i]];
402 abs_level = FFABS(level);
403 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
404 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
406 put_sbits(pb, 1, GET_SIGN(level));
408 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
409 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
418 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
419 const uint16_t *src, int linesize,
420 int mbs_per_slice, int16_t *blocks,
421 int blocks_per_mb, int plane_size_factor,
424 int blocks_per_slice, saved_pos;
426 saved_pos = put_bits_count(pb);
427 blocks_per_slice = mbs_per_slice * blocks_per_mb;
429 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
430 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
431 ctx->scantable, qmat);
434 return (put_bits_count(pb) - saved_pos) >> 3;
437 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
439 const int mask = (1 << abits) - 1;
440 const int dbits = (abits == 8) ? 4 : 7;
441 const int dsize = 1 << dbits - 1;
442 int diff = cur - prev;
445 if (diff >= (1 << abits) - dsize)
447 if (diff < -dsize || diff > dsize || !diff) {
449 put_bits(pb, abits, diff);
452 put_bits(pb, dbits - 1, FFABS(diff) - 1);
453 put_bits(pb, 1, diff < 0);
457 static void put_alpha_run(PutBitContext *pb, int run)
462 put_bits(pb, 4, run);
464 put_bits(pb, 15, run);
470 // todo alpha quantisation for high quants
471 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
472 const uint16_t *src, int linesize,
473 int mbs_per_slice, uint16_t *blocks,
476 const int abits = ctx->alpha_bits;
477 const int mask = (1 << abits) - 1;
478 const int num_coeffs = mbs_per_slice * 256;
479 int saved_pos = put_bits_count(pb);
480 int prev = mask, cur;
485 put_alpha_diff(pb, cur, prev, abits);
490 put_alpha_run (pb, run);
491 put_alpha_diff(pb, cur, prev, abits);
497 } while (idx < num_coeffs);
499 put_alpha_run(pb, run);
501 return (put_bits_count(pb) - saved_pos) >> 3;
504 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
506 int sizes[4], int x, int y, int quant,
509 ProresContext *ctx = avctx->priv_data;
513 int slice_width_factor = av_log2(mbs_per_slice);
514 int num_cblocks, pwidth, linesize, line_add;
515 int plane_factor, is_chroma;
518 if (ctx->pictures_per_frame == 1)
521 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
523 if (ctx->force_quant) {
524 qmat = ctx->quants[0];
525 } else if (quant < MAX_STORED_Q) {
526 qmat = ctx->quants[quant];
528 qmat = ctx->custom_q;
529 for (i = 0; i < 64; i++)
530 qmat[i] = ctx->quant_mat[i] * quant;
533 for (i = 0; i < ctx->num_planes; i++) {
534 is_chroma = (i == 1 || i == 2);
535 plane_factor = slice_width_factor + 2;
537 plane_factor += ctx->chroma_factor - 3;
538 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
542 pwidth = avctx->width;
547 pwidth = avctx->width >> 1;
550 linesize = pic->linesize[i] * ctx->pictures_per_frame;
551 src = (const uint16_t*)(pic->data[i] + yp * linesize +
552 line_add * pic->linesize[i]) + xp;
555 get_slice_data(ctx, src, linesize, xp, yp,
556 pwidth, avctx->height / ctx->pictures_per_frame,
557 ctx->blocks[0], ctx->emu_buf,
558 mbs_per_slice, num_cblocks, is_chroma);
559 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
560 mbs_per_slice, ctx->blocks[0],
561 num_cblocks, plane_factor,
564 get_alpha_data(ctx, src, linesize, xp, yp,
565 pwidth, avctx->height / ctx->pictures_per_frame,
566 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
567 sizes[i] = encode_alpha_plane(ctx, pb, src, linesize,
568 mbs_per_slice, ctx->blocks[0],
571 total_size += sizes[i];
576 static inline int estimate_vlc(unsigned codebook, int val)
578 unsigned int rice_order, exp_order, switch_bits, switch_val;
581 /* number of prefix bits to switch between Rice and expGolomb */
582 switch_bits = (codebook & 3) + 1;
583 rice_order = codebook >> 5; /* rice code order */
584 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
586 switch_val = switch_bits << rice_order;
588 if (val >= switch_val) {
589 val -= switch_val - (1 << exp_order);
590 exponent = av_log2(val);
592 return exponent * 2 - exp_order + switch_bits + 1;
594 return (val >> rice_order) + rice_order + 1;
598 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
602 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
605 prev_dc = (blocks[0] - 0x4000) / scale;
606 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
610 *error += FFABS(blocks[0] - 0x4000) % scale;
612 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
613 dc = (blocks[0] - 0x4000) / scale;
614 *error += FFABS(blocks[0] - 0x4000) % scale;
615 delta = dc - prev_dc;
616 new_sign = GET_SIGN(delta);
617 delta = (delta ^ sign) - sign;
618 code = MAKE_CODE(delta);
619 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
620 codebook = (code + (code & 1)) >> 1;
621 codebook = FFMIN(codebook, 3);
629 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
630 int plane_size_factor,
631 const uint8_t *scan, const int16_t *qmat)
634 int run, level, run_cb, lev_cb;
635 int max_coeffs, abs_level;
638 max_coeffs = blocks_per_slice << 6;
639 run_cb = ff_prores_run_to_cb_index[4];
640 lev_cb = ff_prores_lev_to_cb_index[2];
643 for (i = 1; i < 64; i++) {
644 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
645 level = blocks[idx] / qmat[scan[i]];
646 *error += FFABS(blocks[idx]) % qmat[scan[i]];
648 abs_level = FFABS(level);
649 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
650 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
653 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
654 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
665 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
666 const uint16_t *src, int linesize,
668 int blocks_per_mb, int plane_size_factor,
669 const int16_t *qmat, ProresThreadData *td)
671 int blocks_per_slice;
674 blocks_per_slice = mbs_per_slice * blocks_per_mb;
676 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
677 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
678 plane_size_factor, ctx->scantable, qmat);
680 return FFALIGN(bits, 8);
683 static int est_alpha_diff(int cur, int prev, int abits)
685 const int mask = (1 << abits) - 1;
686 const int dbits = (abits == 8) ? 4 : 7;
687 const int dsize = 1 << dbits - 1;
688 int diff = cur - prev;
691 if (diff >= (1 << abits) - dsize)
693 if (diff < -dsize || diff > dsize || !diff)
699 static int estimate_alpha_plane(ProresContext *ctx, int *error,
700 const uint16_t *src, int linesize,
701 int mbs_per_slice, int quant,
704 const int abits = ctx->alpha_bits;
705 const int mask = (1 << abits) - 1;
706 const int num_coeffs = mbs_per_slice * 256;
707 int prev = mask, cur;
714 bits = est_alpha_diff(cur, prev, abits);
725 bits += est_alpha_diff(cur, prev, abits);
731 } while (idx < num_coeffs);
743 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
744 int trellis_node, int x, int y, int mbs_per_slice,
745 ProresThreadData *td)
747 ProresContext *ctx = avctx->priv_data;
748 int i, q, pq, xp, yp;
750 int slice_width_factor = av_log2(mbs_per_slice);
751 int num_cblocks[MAX_PLANES], pwidth;
752 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
753 const int min_quant = ctx->profile_info->min_quant;
754 const int max_quant = ctx->profile_info->max_quant;
755 int error, bits, bits_limit;
756 int mbs, prev, cur, new_score;
757 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
760 int linesize[4], line_add;
762 if (ctx->pictures_per_frame == 1)
765 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
766 mbs = x + mbs_per_slice;
768 for (i = 0; i < ctx->num_planes; i++) {
769 is_chroma[i] = (i == 1 || i == 2);
770 plane_factor[i] = slice_width_factor + 2;
772 plane_factor[i] += ctx->chroma_factor - 3;
773 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
777 pwidth = avctx->width;
782 pwidth = avctx->width >> 1;
785 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
786 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
787 line_add * pic->linesize[i]) + xp;
790 get_slice_data(ctx, src, linesize[i], xp, yp,
791 pwidth, avctx->height / ctx->pictures_per_frame,
792 td->blocks[i], td->emu_buf,
793 mbs_per_slice, num_cblocks[i], is_chroma[i]);
795 get_alpha_data(ctx, src, linesize[i], xp, yp,
796 pwidth, avctx->height / ctx->pictures_per_frame,
797 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
801 for (q = min_quant; q < max_quant + 2; q++) {
802 td->nodes[trellis_node + q].prev_node = -1;
803 td->nodes[trellis_node + q].quant = q;
806 // todo: maybe perform coarser quantising to fit into frame size when needed
807 for (q = min_quant; q <= max_quant; q++) {
810 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
811 bits += estimate_slice_plane(ctx, &error, i,
814 num_cblocks[i], plane_factor[i],
818 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
819 mbs_per_slice, q, td->blocks[3]);
820 if (bits > 65000 * 8) {
824 slice_bits[q] = bits;
825 slice_score[q] = error;
827 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
828 slice_bits[max_quant + 1] = slice_bits[max_quant];
829 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
830 overquant = max_quant;
832 for (q = max_quant + 1; q < 128; q++) {
835 if (q < MAX_STORED_Q) {
836 qmat = ctx->quants[q];
839 for (i = 0; i < 64; i++)
840 qmat[i] = ctx->quant_mat[i] * q;
842 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
843 bits += estimate_slice_plane(ctx, &error, i,
846 num_cblocks[i], plane_factor[i],
850 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
851 mbs_per_slice, q, td->blocks[3]);
852 if (bits <= ctx->bits_per_mb * mbs_per_slice)
856 slice_bits[max_quant + 1] = bits;
857 slice_score[max_quant + 1] = error;
860 td->nodes[trellis_node + max_quant + 1].quant = overquant;
862 bits_limit = mbs * ctx->bits_per_mb;
863 for (pq = min_quant; pq < max_quant + 2; pq++) {
864 prev = trellis_node - TRELLIS_WIDTH + pq;
866 for (q = min_quant; q < max_quant + 2; q++) {
867 cur = trellis_node + q;
869 bits = td->nodes[prev].bits + slice_bits[q];
870 error = slice_score[q];
871 if (bits > bits_limit)
874 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
875 new_score = td->nodes[prev].score + error;
877 new_score = SCORE_LIMIT;
878 if (td->nodes[cur].prev_node == -1 ||
879 td->nodes[cur].score >= new_score) {
881 td->nodes[cur].bits = bits;
882 td->nodes[cur].score = new_score;
883 td->nodes[cur].prev_node = prev;
888 error = td->nodes[trellis_node + min_quant].score;
889 pq = trellis_node + min_quant;
890 for (q = min_quant + 1; q < max_quant + 2; q++) {
891 if (td->nodes[trellis_node + q].score <= error) {
892 error = td->nodes[trellis_node + q].score;
893 pq = trellis_node + q;
900 static int find_quant_thread(AVCodecContext *avctx, void *arg,
901 int jobnr, int threadnr)
903 ProresContext *ctx = avctx->priv_data;
904 ProresThreadData *td = ctx->tdata + threadnr;
905 int mbs_per_slice = ctx->mbs_per_slice;
906 int x, y = jobnr, mb, q = 0;
908 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
909 while (ctx->mb_width - x < mbs_per_slice)
911 q = find_slice_quant(avctx, avctx->coded_frame,
912 (mb + 1) * TRELLIS_WIDTH, x, y,
916 for (x = ctx->slices_width - 1; x >= 0; x--) {
917 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
918 q = td->nodes[q].prev_node;
924 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
925 const AVFrame *pic, int *got_packet)
927 ProresContext *ctx = avctx->priv_data;
928 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
929 uint8_t *picture_size_pos;
931 int x, y, i, mb, q = 0;
932 int sizes[4] = { 0 };
933 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
934 int frame_size, picture_size, slice_size;
938 *avctx->coded_frame = *pic;
939 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
940 avctx->coded_frame->key_frame = 1;
942 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
944 if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
945 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
949 orig_buf = pkt->data;
952 orig_buf += 4; // frame size
953 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
958 buf += 2; // frame header size will be stored here
959 bytestream_put_be16 (&buf, 0); // version 1
960 bytestream_put_buffer(&buf, ctx->vendor, 4);
961 bytestream_put_be16 (&buf, avctx->width);
962 bytestream_put_be16 (&buf, avctx->height);
964 frame_flags = ctx->chroma_factor << 6;
965 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
966 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
967 bytestream_put_byte (&buf, frame_flags);
969 bytestream_put_byte (&buf, 0); // reserved
970 bytestream_put_byte (&buf, avctx->color_primaries);
971 bytestream_put_byte (&buf, avctx->color_trc);
972 bytestream_put_byte (&buf, avctx->colorspace);
973 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
974 bytestream_put_byte (&buf, 0); // reserved
975 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
976 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
977 // luma quantisation matrix
978 for (i = 0; i < 64; i++)
979 bytestream_put_byte(&buf, ctx->quant_mat[i]);
980 // chroma quantisation matrix
981 for (i = 0; i < 64; i++)
982 bytestream_put_byte(&buf, ctx->quant_mat[i]);
984 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
986 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
988 for (ctx->cur_picture_idx = 0;
989 ctx->cur_picture_idx < ctx->pictures_per_frame;
990 ctx->cur_picture_idx++) {
992 picture_size_pos = buf + 1;
993 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
994 buf += 4; // picture data size will be stored here
995 bytestream_put_be16 (&buf, ctx->slices_per_picture);
996 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
998 // seek table - will be filled during slice encoding
1000 buf += ctx->slices_per_picture * 2;
1003 if (!ctx->force_quant) {
1004 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1010 for (y = 0; y < ctx->mb_height; y++) {
1011 int mbs_per_slice = ctx->mbs_per_slice;
1012 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1013 q = ctx->force_quant ? ctx->force_quant
1014 : ctx->slice_q[mb + y * ctx->slices_width];
1016 while (ctx->mb_width - x < mbs_per_slice)
1017 mbs_per_slice >>= 1;
1019 bytestream_put_byte(&buf, slice_hdr_size << 3);
1021 buf += slice_hdr_size - 1;
1022 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1023 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
1025 bytestream_put_byte(&slice_hdr, q);
1026 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1027 for (i = 0; i < ctx->num_planes - 1; i++) {
1028 bytestream_put_be16(&slice_hdr, sizes[i]);
1029 slice_size += sizes[i];
1031 bytestream_put_be16(&slice_sizes, slice_size);
1032 buf += slice_size - slice_hdr_size;
1036 if (ctx->pictures_per_frame == 1)
1037 picture_size = buf - picture_size_pos - 6;
1039 picture_size = buf - picture_size_pos + 1;
1040 bytestream_put_be32(&picture_size_pos, picture_size);
1044 frame_size = buf - orig_buf;
1045 bytestream_put_be32(&orig_buf, frame_size);
1047 pkt->size = frame_size;
1048 pkt->flags |= AV_PKT_FLAG_KEY;
1054 static av_cold int encode_close(AVCodecContext *avctx)
1056 ProresContext *ctx = avctx->priv_data;
1059 av_freep(&avctx->coded_frame);
1062 for (i = 0; i < avctx->thread_count; i++)
1063 av_free(ctx->tdata[i].nodes);
1065 av_freep(&ctx->tdata);
1066 av_freep(&ctx->slice_q);
1071 static void prores_fdct(DSPContext *dsp, const uint16_t *src,
1072 int linesize, int16_t *block)
1075 const uint16_t *tsrc = src;
1077 for (y = 0; y < 8; y++) {
1078 for (x = 0; x < 8; x++)
1079 block[y * 8 + x] = tsrc[x];
1080 tsrc += linesize >> 1;
1085 static av_cold int encode_init(AVCodecContext *avctx)
1087 ProresContext *ctx = avctx->priv_data;
1090 int min_quant, max_quant;
1091 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1093 avctx->bits_per_raw_sample = 10;
1094 avctx->coded_frame = av_frame_alloc();
1095 if (!avctx->coded_frame)
1096 return AVERROR(ENOMEM);
1098 ctx->fdct = prores_fdct;
1099 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1100 : ff_prores_progressive_scan;
1101 ff_dsputil_init(&ctx->dsp, avctx);
1103 mps = ctx->mbs_per_slice;
1104 if (mps & (mps - 1)) {
1105 av_log(avctx, AV_LOG_ERROR,
1106 "there should be an integer power of two MBs per slice\n");
1107 return AVERROR(EINVAL);
1109 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1110 if (ctx->alpha_bits & 7) {
1111 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1112 return AVERROR(EINVAL);
1115 ctx->alpha_bits = 0;
1118 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1121 ctx->profile_info = prores_profile_info + ctx->profile;
1122 ctx->num_planes = 3 + !!ctx->alpha_bits;
1124 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1127 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1129 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1131 ctx->slices_width = ctx->mb_width / mps;
1132 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1133 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1134 ctx->pictures_per_frame = 1 + interlaced;
1136 if (ctx->quant_sel == -1)
1137 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1139 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1141 if (strlen(ctx->vendor) != 4) {
1142 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1143 return AVERROR_INVALIDDATA;
1146 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1147 if (!ctx->force_quant) {
1148 if (!ctx->bits_per_mb) {
1149 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1150 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1151 ctx->pictures_per_frame)
1153 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1154 } else if (ctx->bits_per_mb < 128) {
1155 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1156 return AVERROR_INVALIDDATA;
1159 min_quant = ctx->profile_info->min_quant;
1160 max_quant = ctx->profile_info->max_quant;
1161 for (i = min_quant; i < MAX_STORED_Q; i++) {
1162 for (j = 0; j < 64; j++)
1163 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1166 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1167 if (!ctx->slice_q) {
1168 encode_close(avctx);
1169 return AVERROR(ENOMEM);
1172 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1174 encode_close(avctx);
1175 return AVERROR(ENOMEM);
1178 for (j = 0; j < avctx->thread_count; j++) {
1179 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1181 * sizeof(*ctx->tdata->nodes));
1182 if (!ctx->tdata[j].nodes) {
1183 encode_close(avctx);
1184 return AVERROR(ENOMEM);
1186 for (i = min_quant; i < max_quant + 2; i++) {
1187 ctx->tdata[j].nodes[i].prev_node = -1;
1188 ctx->tdata[j].nodes[i].bits = 0;
1189 ctx->tdata[j].nodes[i].score = 0;
1195 if (ctx->force_quant > 64) {
1196 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1197 return AVERROR_INVALIDDATA;
1200 for (j = 0; j < 64; j++) {
1201 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1202 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1205 ctx->bits_per_mb = ls * 8;
1206 if (ctx->chroma_factor == CFACTOR_Y444)
1207 ctx->bits_per_mb += ls * 4;
1208 if (ctx->num_planes == 4)
1209 ctx->bits_per_mb += ls * 4;
1212 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1213 ctx->slices_per_picture *
1214 (2 + 2 * ctx->num_planes +
1215 (mps * ctx->bits_per_mb) / 8)
1218 avctx->codec_tag = ctx->profile_info->tag;
1220 av_log(avctx, AV_LOG_DEBUG,
1221 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1222 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1223 interlaced ? "yes" : "no", ctx->bits_per_mb);
1224 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1225 ctx->frame_size_upper_bound);
1230 #define OFFSET(x) offsetof(ProresContext, x)
1231 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1233 static const AVOption options[] = {
1234 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1235 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1236 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1237 { .i64 = PRORES_PROFILE_STANDARD },
1238 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1239 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1240 0, 0, VE, "profile" },
1241 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1242 0, 0, VE, "profile" },
1243 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1244 0, 0, VE, "profile" },
1245 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1246 0, 0, VE, "profile" },
1247 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1248 0, 0, VE, "profile" },
1249 { "vendor", "vendor ID", OFFSET(vendor),
1250 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1251 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1252 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1253 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1254 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1255 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1256 0, 0, VE, "quant_mat" },
1257 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1258 0, 0, VE, "quant_mat" },
1259 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1260 0, 0, VE, "quant_mat" },
1261 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1262 0, 0, VE, "quant_mat" },
1263 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1264 0, 0, VE, "quant_mat" },
1265 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1266 0, 0, VE, "quant_mat" },
1267 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1268 { .i64 = 16 }, 0, 16, VE },
1272 static const AVClass proresenc_class = {
1273 .class_name = "ProRes encoder",
1274 .item_name = av_default_item_name,
1276 .version = LIBAVUTIL_VERSION_INT,
1279 AVCodec ff_prores_encoder = {
1281 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1282 .type = AVMEDIA_TYPE_VIDEO,
1283 .id = AV_CODEC_ID_PRORES,
1284 .priv_data_size = sizeof(ProresContext),
1285 .init = encode_init,
1286 .close = encode_close,
1287 .encode2 = encode_frame,
1288 .capabilities = CODEC_CAP_SLICE_THREADS,
1289 .pix_fmts = (const enum AVPixelFormat[]) {
1290 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1291 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1293 .priv_class = &proresenc_class,