4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_AUTO = -1,
44 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_STANDARD,
49 PRORES_PROFILE_4444XQ,
60 static const uint8_t prores_quant_matrices[][64] = {
62 4, 7, 9, 11, 13, 14, 15, 63,
63 7, 7, 11, 12, 14, 15, 63, 63,
64 9, 11, 13, 14, 15, 63, 63, 63,
65 11, 11, 13, 14, 63, 63, 63, 63,
66 11, 13, 14, 63, 63, 63, 63, 63,
67 13, 14, 63, 63, 63, 63, 63, 63,
68 13, 63, 63, 63, 63, 63, 63, 63,
69 63, 63, 63, 63, 63, 63, 63, 63,
72 4, 5, 6, 7, 9, 11, 13, 15,
73 5, 5, 7, 8, 11, 13, 15, 17,
74 6, 7, 9, 11, 13, 15, 15, 17,
75 7, 7, 9, 11, 13, 15, 17, 19,
76 7, 9, 11, 13, 14, 16, 19, 23,
77 9, 11, 13, 14, 16, 19, 23, 29,
78 9, 11, 13, 15, 17, 21, 28, 35,
79 11, 13, 16, 17, 21, 28, 35, 41,
82 4, 4, 5, 5, 6, 7, 7, 9,
83 4, 4, 5, 6, 7, 7, 9, 9,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 5, 6, 7, 7, 9, 9, 10,
86 5, 6, 7, 7, 8, 9, 10, 12,
87 6, 7, 7, 8, 9, 10, 12, 15,
88 6, 7, 7, 9, 10, 11, 14, 17,
89 7, 7, 9, 10, 11, 14, 17, 21,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 4,
95 4, 4, 4, 4, 4, 4, 4, 5,
96 4, 4, 4, 4, 4, 4, 5, 5,
97 4, 4, 4, 4, 4, 5, 5, 6,
98 4, 4, 4, 4, 5, 5, 6, 7,
99 4, 4, 4, 4, 5, 6, 7, 7,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
109 4, 4, 4, 4, 4, 4, 4, 4,
113 #define NUM_MB_LIMITS 4
114 static const int prores_mb_limits[NUM_MB_LIMITS] = {
115 1620, // up to 720x576
116 2700, // up to 960x720
117 6075, // up to 1440x1080
118 9216, // up to 2048x1152
121 static const struct prores_profile {
122 const char *full_name;
126 int br_tab[NUM_MB_LIMITS];
128 } prores_profile_info[6] = {
130 .full_name = "proxy",
131 .tag = MKTAG('a', 'p', 'c', 'o'),
134 .br_tab = { 300, 242, 220, 194 },
135 .quant = QUANT_MAT_PROXY,
139 .tag = MKTAG('a', 'p', 'c', 's'),
142 .br_tab = { 720, 560, 490, 440 },
143 .quant = QUANT_MAT_LT,
146 .full_name = "standard",
147 .tag = MKTAG('a', 'p', 'c', 'n'),
150 .br_tab = { 1050, 808, 710, 632 },
151 .quant = QUANT_MAT_STANDARD,
154 .full_name = "high quality",
155 .tag = MKTAG('a', 'p', 'c', 'h'),
158 .br_tab = { 1566, 1216, 1070, 950 },
159 .quant = QUANT_MAT_HQ,
163 .tag = MKTAG('a', 'p', '4', 'h'),
166 .br_tab = { 2350, 1828, 1600, 1425 },
167 .quant = QUANT_MAT_HQ,
170 .full_name = "4444XQ",
171 .tag = MKTAG('a', 'p', '4', 'x'),
174 .br_tab = { 3525, 2742, 2400, 2137 },
175 .quant = QUANT_MAT_HQ,
179 #define TRELLIS_WIDTH 16
180 #define SCORE_LIMIT INT_MAX / 2
189 #define MAX_STORED_Q 16
191 typedef struct ProresThreadData {
192 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
193 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
194 int16_t custom_q[64];
195 struct TrellisNode *nodes;
198 typedef struct ProresContext {
200 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
201 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
202 int16_t quants[MAX_STORED_Q][64];
203 int16_t custom_q[64];
204 const uint8_t *quant_mat;
205 const uint8_t *scantable;
207 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
208 ptrdiff_t linesize, int16_t *block);
212 int mb_width, mb_height;
214 int num_chroma_blocks, chroma_factor;
216 int slices_per_picture;
217 int pictures_per_frame; // 1 for progressive, 2 for interlaced
228 int frame_size_upper_bound;
231 const struct prores_profile *profile_info;
235 ProresThreadData *tdata;
238 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
239 ptrdiff_t linesize, int x, int y, int w, int h,
240 int16_t *blocks, uint16_t *emu_buf,
241 int mbs_per_slice, int blocks_per_mb, int is_chroma)
243 const uint16_t *esrc;
244 const int mb_width = 4 * blocks_per_mb;
248 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
250 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
254 if (x + mb_width <= w && y + 16 <= h) {
256 elinesize = linesize;
261 elinesize = 16 * sizeof(*emu_buf);
263 bw = FFMIN(w - x, mb_width);
264 bh = FFMIN(h - y, 16);
266 for (j = 0; j < bh; j++) {
267 memcpy(emu_buf + j * 16,
268 (const uint8_t*)src + j * linesize,
270 pix = emu_buf[j * 16 + bw - 1];
271 for (k = bw; k < mb_width; k++)
272 emu_buf[j * 16 + k] = pix;
275 memcpy(emu_buf + j * 16,
276 emu_buf + (bh - 1) * 16,
277 mb_width * sizeof(*emu_buf));
280 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
282 if (blocks_per_mb > 2) {
283 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
286 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
288 if (blocks_per_mb > 2) {
289 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
293 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
295 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
297 if (blocks_per_mb > 2) {
298 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
300 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
309 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
310 ptrdiff_t linesize, int x, int y, int w, int h,
311 int16_t *blocks, int mbs_per_slice, int abits)
313 const int slice_width = 16 * mbs_per_slice;
314 int i, j, copy_w, copy_h;
316 copy_w = FFMIN(w - x, slice_width);
317 copy_h = FFMIN(h - y, 16);
318 for (i = 0; i < copy_h; i++) {
319 memcpy(blocks, src, copy_w * sizeof(*src));
321 for (j = 0; j < copy_w; j++)
324 for (j = 0; j < copy_w; j++)
325 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
326 for (j = copy_w; j < slice_width; j++)
327 blocks[j] = blocks[copy_w - 1];
328 blocks += slice_width;
329 src += linesize >> 1;
331 for (; i < 16; i++) {
332 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
333 blocks += slice_width;
338 * Write an unsigned rice/exp golomb codeword.
340 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
342 unsigned int rice_order, exp_order, switch_bits, switch_val;
345 /* number of prefix bits to switch between Rice and expGolomb */
346 switch_bits = (codebook & 3) + 1;
347 rice_order = codebook >> 5; /* rice code order */
348 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
350 switch_val = switch_bits << rice_order;
352 if (val >= switch_val) {
353 val -= switch_val - (1 << exp_order);
354 exponent = av_log2(val);
356 put_bits(pb, exponent - exp_order + switch_bits, 0);
357 put_bits(pb, exponent + 1, val);
359 exponent = val >> rice_order;
362 put_bits(pb, exponent, 0);
365 put_sbits(pb, rice_order, val);
369 #define GET_SIGN(x) ((x) >> 31)
370 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
372 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
373 int blocks_per_slice, int scale)
376 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
378 prev_dc = (blocks[0] - 0x4000) / scale;
379 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
384 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
385 dc = (blocks[0] - 0x4000) / scale;
386 delta = dc - prev_dc;
387 new_sign = GET_SIGN(delta);
388 delta = (delta ^ sign) - sign;
389 code = MAKE_CODE(delta);
390 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
391 codebook = (code + (code & 1)) >> 1;
392 codebook = FFMIN(codebook, 3);
398 static void encode_acs(PutBitContext *pb, int16_t *blocks,
399 int blocks_per_slice,
400 int plane_size_factor,
401 const uint8_t *scan, const int16_t *qmat)
404 int run, level, run_cb, lev_cb;
405 int max_coeffs, abs_level;
407 max_coeffs = blocks_per_slice << 6;
408 run_cb = ff_prores_run_to_cb_index[4];
409 lev_cb = ff_prores_lev_to_cb_index[2];
412 for (i = 1; i < 64; i++) {
413 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
414 level = blocks[idx] / qmat[scan[i]];
416 abs_level = FFABS(level);
417 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
418 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
420 put_sbits(pb, 1, GET_SIGN(level));
422 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
423 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
432 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
433 const uint16_t *src, ptrdiff_t linesize,
434 int mbs_per_slice, int16_t *blocks,
435 int blocks_per_mb, int plane_size_factor,
438 int blocks_per_slice, saved_pos;
440 saved_pos = put_bits_count(pb);
441 blocks_per_slice = mbs_per_slice * blocks_per_mb;
443 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
444 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
445 ctx->scantable, qmat);
448 return (put_bits_count(pb) - saved_pos) >> 3;
451 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
453 const int dbits = (abits == 8) ? 4 : 7;
454 const int dsize = 1 << dbits - 1;
455 int diff = cur - prev;
457 diff = av_mod_uintp2(diff, abits);
458 if (diff >= (1 << abits) - dsize)
460 if (diff < -dsize || diff > dsize || !diff) {
462 put_bits(pb, abits, diff);
465 put_bits(pb, dbits - 1, FFABS(diff) - 1);
466 put_bits(pb, 1, diff < 0);
470 static void put_alpha_run(PutBitContext *pb, int run)
475 put_bits(pb, 4, run);
477 put_bits(pb, 15, run);
483 // todo alpha quantisation for high quants
484 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
485 int mbs_per_slice, uint16_t *blocks,
488 const int abits = ctx->alpha_bits;
489 const int mask = (1 << abits) - 1;
490 const int num_coeffs = mbs_per_slice * 256;
491 int saved_pos = put_bits_count(pb);
492 int prev = mask, cur;
497 put_alpha_diff(pb, cur, prev, abits);
502 put_alpha_run (pb, run);
503 put_alpha_diff(pb, cur, prev, abits);
509 } while (idx < num_coeffs);
511 put_alpha_run(pb, run);
513 return (put_bits_count(pb) - saved_pos) >> 3;
516 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
518 int sizes[4], int x, int y, int quant,
521 ProresContext *ctx = avctx->priv_data;
525 int slice_width_factor = av_log2(mbs_per_slice);
526 int num_cblocks, pwidth, line_add;
528 int plane_factor, is_chroma;
531 if (ctx->pictures_per_frame == 1)
534 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
536 if (ctx->force_quant) {
537 qmat = ctx->quants[0];
538 } else if (quant < MAX_STORED_Q) {
539 qmat = ctx->quants[quant];
541 qmat = ctx->custom_q;
542 for (i = 0; i < 64; i++)
543 qmat[i] = ctx->quant_mat[i] * quant;
546 for (i = 0; i < ctx->num_planes; i++) {
547 is_chroma = (i == 1 || i == 2);
548 plane_factor = slice_width_factor + 2;
550 plane_factor += ctx->chroma_factor - 3;
551 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
555 pwidth = avctx->width;
560 pwidth = avctx->width >> 1;
563 linesize = pic->linesize[i] * ctx->pictures_per_frame;
564 src = (const uint16_t*)(pic->data[i] + yp * linesize +
565 line_add * pic->linesize[i]) + xp;
568 get_slice_data(ctx, src, linesize, xp, yp,
569 pwidth, avctx->height / ctx->pictures_per_frame,
570 ctx->blocks[0], ctx->emu_buf,
571 mbs_per_slice, num_cblocks, is_chroma);
572 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
573 mbs_per_slice, ctx->blocks[0],
574 num_cblocks, plane_factor,
577 get_alpha_data(ctx, src, linesize, xp, yp,
578 pwidth, avctx->height / ctx->pictures_per_frame,
579 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
580 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
581 ctx->blocks[0], quant);
583 total_size += sizes[i];
584 if (put_bits_left(pb) < 0) {
585 av_log(avctx, AV_LOG_ERROR,
586 "Underestimated required buffer size.\n");
593 static inline int estimate_vlc(unsigned codebook, int val)
595 unsigned int rice_order, exp_order, switch_bits, switch_val;
598 /* number of prefix bits to switch between Rice and expGolomb */
599 switch_bits = (codebook & 3) + 1;
600 rice_order = codebook >> 5; /* rice code order */
601 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
603 switch_val = switch_bits << rice_order;
605 if (val >= switch_val) {
606 val -= switch_val - (1 << exp_order);
607 exponent = av_log2(val);
609 return exponent * 2 - exp_order + switch_bits + 1;
611 return (val >> rice_order) + rice_order + 1;
615 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
619 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
622 prev_dc = (blocks[0] - 0x4000) / scale;
623 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
627 *error += FFABS(blocks[0] - 0x4000) % scale;
629 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
630 dc = (blocks[0] - 0x4000) / scale;
631 *error += FFABS(blocks[0] - 0x4000) % scale;
632 delta = dc - prev_dc;
633 new_sign = GET_SIGN(delta);
634 delta = (delta ^ sign) - sign;
635 code = MAKE_CODE(delta);
636 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
637 codebook = (code + (code & 1)) >> 1;
638 codebook = FFMIN(codebook, 3);
646 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
647 int plane_size_factor,
648 const uint8_t *scan, const int16_t *qmat)
651 int run, level, run_cb, lev_cb;
652 int max_coeffs, abs_level;
655 max_coeffs = blocks_per_slice << 6;
656 run_cb = ff_prores_run_to_cb_index[4];
657 lev_cb = ff_prores_lev_to_cb_index[2];
660 for (i = 1; i < 64; i++) {
661 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
662 level = blocks[idx] / qmat[scan[i]];
663 *error += FFABS(blocks[idx]) % qmat[scan[i]];
665 abs_level = FFABS(level);
666 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
667 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
670 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
671 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
682 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
683 const uint16_t *src, ptrdiff_t linesize,
685 int blocks_per_mb, int plane_size_factor,
686 const int16_t *qmat, ProresThreadData *td)
688 int blocks_per_slice;
691 blocks_per_slice = mbs_per_slice * blocks_per_mb;
693 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
694 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
695 plane_size_factor, ctx->scantable, qmat);
697 return FFALIGN(bits, 8);
700 static int est_alpha_diff(int cur, int prev, int abits)
702 const int dbits = (abits == 8) ? 4 : 7;
703 const int dsize = 1 << dbits - 1;
704 int diff = cur - prev;
706 diff = av_mod_uintp2(diff, abits);
707 if (diff >= (1 << abits) - dsize)
709 if (diff < -dsize || diff > dsize || !diff)
715 static int estimate_alpha_plane(ProresContext *ctx,
716 const uint16_t *src, ptrdiff_t linesize,
717 int mbs_per_slice, int16_t *blocks)
719 const int abits = ctx->alpha_bits;
720 const int mask = (1 << abits) - 1;
721 const int num_coeffs = mbs_per_slice * 256;
722 int prev = mask, cur;
728 bits = est_alpha_diff(cur, prev, abits);
739 bits += est_alpha_diff(cur, prev, abits);
745 } while (idx < num_coeffs);
757 static int find_slice_quant(AVCodecContext *avctx,
758 int trellis_node, int x, int y, int mbs_per_slice,
759 ProresThreadData *td)
761 ProresContext *ctx = avctx->priv_data;
762 int i, q, pq, xp, yp;
764 int slice_width_factor = av_log2(mbs_per_slice);
765 int num_cblocks[MAX_PLANES], pwidth;
766 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
767 const int min_quant = ctx->profile_info->min_quant;
768 const int max_quant = ctx->profile_info->max_quant;
769 int error, bits, bits_limit;
770 int mbs, prev, cur, new_score;
771 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
774 int linesize[4], line_add;
777 if (ctx->pictures_per_frame == 1)
780 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
781 mbs = x + mbs_per_slice;
783 for (i = 0; i < ctx->num_planes; i++) {
784 is_chroma[i] = (i == 1 || i == 2);
785 plane_factor[i] = slice_width_factor + 2;
787 plane_factor[i] += ctx->chroma_factor - 3;
788 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
792 pwidth = avctx->width;
797 pwidth = avctx->width >> 1;
800 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
801 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
802 line_add * ctx->pic->linesize[i]) + xp;
805 get_slice_data(ctx, src, linesize[i], xp, yp,
806 pwidth, avctx->height / ctx->pictures_per_frame,
807 td->blocks[i], td->emu_buf,
808 mbs_per_slice, num_cblocks[i], is_chroma[i]);
810 get_alpha_data(ctx, src, linesize[i], xp, yp,
811 pwidth, avctx->height / ctx->pictures_per_frame,
812 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
816 for (q = min_quant; q < max_quant + 2; q++) {
817 td->nodes[trellis_node + q].prev_node = -1;
818 td->nodes[trellis_node + q].quant = q;
822 alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
823 mbs_per_slice, td->blocks[3]);
824 // todo: maybe perform coarser quantising to fit into frame size when needed
825 for (q = min_quant; q <= max_quant; q++) {
828 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
829 bits += estimate_slice_plane(ctx, &error, i,
832 num_cblocks[i], plane_factor[i],
835 if (bits > 65000 * 8)
838 slice_bits[q] = bits;
839 slice_score[q] = error;
841 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
842 slice_bits[max_quant + 1] = slice_bits[max_quant];
843 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
844 overquant = max_quant;
846 for (q = max_quant + 1; q < 128; q++) {
849 if (q < MAX_STORED_Q) {
850 qmat = ctx->quants[q];
853 for (i = 0; i < 64; i++)
854 qmat[i] = ctx->quant_mat[i] * q;
856 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
857 bits += estimate_slice_plane(ctx, &error, i,
860 num_cblocks[i], plane_factor[i],
863 if (bits <= ctx->bits_per_mb * mbs_per_slice)
867 slice_bits[max_quant + 1] = bits;
868 slice_score[max_quant + 1] = error;
871 td->nodes[trellis_node + max_quant + 1].quant = overquant;
873 bits_limit = mbs * ctx->bits_per_mb;
874 for (pq = min_quant; pq < max_quant + 2; pq++) {
875 prev = trellis_node - TRELLIS_WIDTH + pq;
877 for (q = min_quant; q < max_quant + 2; q++) {
878 cur = trellis_node + q;
880 bits = td->nodes[prev].bits + slice_bits[q];
881 error = slice_score[q];
882 if (bits > bits_limit)
885 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
886 new_score = td->nodes[prev].score + error;
888 new_score = SCORE_LIMIT;
889 if (td->nodes[cur].prev_node == -1 ||
890 td->nodes[cur].score >= new_score) {
892 td->nodes[cur].bits = bits;
893 td->nodes[cur].score = new_score;
894 td->nodes[cur].prev_node = prev;
899 error = td->nodes[trellis_node + min_quant].score;
900 pq = trellis_node + min_quant;
901 for (q = min_quant + 1; q < max_quant + 2; q++) {
902 if (td->nodes[trellis_node + q].score <= error) {
903 error = td->nodes[trellis_node + q].score;
904 pq = trellis_node + q;
911 static int find_quant_thread(AVCodecContext *avctx, void *arg,
912 int jobnr, int threadnr)
914 ProresContext *ctx = avctx->priv_data;
915 ProresThreadData *td = ctx->tdata + threadnr;
916 int mbs_per_slice = ctx->mbs_per_slice;
917 int x, y = jobnr, mb, q = 0;
919 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
920 while (ctx->mb_width - x < mbs_per_slice)
922 q = find_slice_quant(avctx,
923 (mb + 1) * TRELLIS_WIDTH, x, y,
927 for (x = ctx->slices_width - 1; x >= 0; x--) {
928 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
929 q = td->nodes[q].prev_node;
935 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
936 const AVFrame *pic, int *got_packet)
938 ProresContext *ctx = avctx->priv_data;
939 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
940 uint8_t *picture_size_pos;
942 int x, y, i, mb, q = 0;
943 int sizes[4] = { 0 };
944 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
945 int frame_size, picture_size, slice_size;
947 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
951 pkt_size = ctx->frame_size_upper_bound;
953 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
956 orig_buf = pkt->data;
959 orig_buf += 4; // frame size
960 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
965 buf += 2; // frame header size will be stored here
966 bytestream_put_be16 (&buf, 0); // version 1
967 bytestream_put_buffer(&buf, ctx->vendor, 4);
968 bytestream_put_be16 (&buf, avctx->width);
969 bytestream_put_be16 (&buf, avctx->height);
971 frame_flags = ctx->chroma_factor << 6;
972 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
973 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
974 bytestream_put_byte (&buf, frame_flags);
976 bytestream_put_byte (&buf, 0); // reserved
977 bytestream_put_byte (&buf, pic->color_primaries);
978 bytestream_put_byte (&buf, pic->color_trc);
979 bytestream_put_byte (&buf, pic->colorspace);
980 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
981 bytestream_put_byte (&buf, 0); // reserved
982 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
983 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
984 // luma quantisation matrix
985 for (i = 0; i < 64; i++)
986 bytestream_put_byte(&buf, ctx->quant_mat[i]);
987 // chroma quantisation matrix
988 for (i = 0; i < 64; i++)
989 bytestream_put_byte(&buf, ctx->quant_mat[i]);
991 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
993 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
995 for (ctx->cur_picture_idx = 0;
996 ctx->cur_picture_idx < ctx->pictures_per_frame;
997 ctx->cur_picture_idx++) {
999 picture_size_pos = buf + 1;
1000 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1001 buf += 4; // picture data size will be stored here
1002 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1003 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1005 // seek table - will be filled during slice encoding
1007 buf += ctx->slices_per_picture * 2;
1010 if (!ctx->force_quant) {
1011 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1017 for (y = 0; y < ctx->mb_height; y++) {
1018 int mbs_per_slice = ctx->mbs_per_slice;
1019 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1020 q = ctx->force_quant ? ctx->force_quant
1021 : ctx->slice_q[mb + y * ctx->slices_width];
1023 while (ctx->mb_width - x < mbs_per_slice)
1024 mbs_per_slice >>= 1;
1026 bytestream_put_byte(&buf, slice_hdr_size << 3);
1028 buf += slice_hdr_size - 1;
1029 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1030 uint8_t *start = pkt->data;
1031 // Recompute new size according to max_slice_size
1033 int delta = 200 + (ctx->pictures_per_frame *
1034 ctx->slices_per_picture + 1) *
1035 max_slice_size - pkt_size;
1037 delta = FFMAX(delta, 2 * max_slice_size);
1038 ctx->frame_size_upper_bound += delta;
1041 avpriv_request_sample(avctx,
1042 "Packet too small: is %i,"
1043 " needs %i (slice: %i). "
1044 "Correct allocation",
1045 pkt_size, delta, max_slice_size);
1049 ret = av_grow_packet(pkt, delta);
1055 orig_buf = pkt->data + (orig_buf - start);
1056 buf = pkt->data + (buf - start);
1057 picture_size_pos = pkt->data + (picture_size_pos - start);
1058 slice_sizes = pkt->data + (slice_sizes - start);
1059 slice_hdr = pkt->data + (slice_hdr - start);
1060 tmp = pkt->data + (tmp - start);
1062 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1063 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1068 bytestream_put_byte(&slice_hdr, q);
1069 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1070 for (i = 0; i < ctx->num_planes - 1; i++) {
1071 bytestream_put_be16(&slice_hdr, sizes[i]);
1072 slice_size += sizes[i];
1074 bytestream_put_be16(&slice_sizes, slice_size);
1075 buf += slice_size - slice_hdr_size;
1076 if (max_slice_size < slice_size)
1077 max_slice_size = slice_size;
1081 picture_size = buf - (picture_size_pos - 1);
1082 bytestream_put_be32(&picture_size_pos, picture_size);
1086 frame_size = buf - orig_buf;
1087 bytestream_put_be32(&orig_buf, frame_size);
1089 pkt->size = frame_size;
1090 pkt->flags |= AV_PKT_FLAG_KEY;
1096 static av_cold int encode_close(AVCodecContext *avctx)
1098 ProresContext *ctx = avctx->priv_data;
1102 for (i = 0; i < avctx->thread_count; i++)
1103 av_freep(&ctx->tdata[i].nodes);
1105 av_freep(&ctx->tdata);
1106 av_freep(&ctx->slice_q);
1111 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1112 ptrdiff_t linesize, int16_t *block)
1115 const uint16_t *tsrc = src;
1117 for (y = 0; y < 8; y++) {
1118 for (x = 0; x < 8; x++)
1119 block[y * 8 + x] = tsrc[x];
1120 tsrc += linesize >> 1;
1125 static av_cold int encode_init(AVCodecContext *avctx)
1127 ProresContext *ctx = avctx->priv_data;
1130 int min_quant, max_quant;
1131 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1133 avctx->bits_per_raw_sample = 10;
1134 #if FF_API_CODED_FRAME
1135 FF_DISABLE_DEPRECATION_WARNINGS
1136 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1137 avctx->coded_frame->key_frame = 1;
1138 FF_ENABLE_DEPRECATION_WARNINGS
1141 ctx->fdct = prores_fdct;
1142 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1143 : ff_prores_progressive_scan;
1144 ff_fdctdsp_init(&ctx->fdsp, avctx);
1146 mps = ctx->mbs_per_slice;
1147 if (mps & (mps - 1)) {
1148 av_log(avctx, AV_LOG_ERROR,
1149 "there should be an integer power of two MBs per slice\n");
1150 return AVERROR(EINVAL);
1152 if (ctx->profile == PRORES_PROFILE_AUTO) {
1153 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1154 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1155 !(desc->log2_chroma_w + desc->log2_chroma_h))
1156 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1157 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1158 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1159 ? "4:4:4:4 profile because of the used input colorspace"
1160 : "HQ profile to keep best quality");
1162 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1163 if (ctx->profile != PRORES_PROFILE_4444 &&
1164 ctx->profile != PRORES_PROFILE_4444XQ) {
1165 // force alpha and warn
1166 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1167 "encode alpha. Override with -profile if needed.\n");
1168 ctx->alpha_bits = 0;
1170 if (ctx->alpha_bits & 7) {
1171 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1172 return AVERROR(EINVAL);
1174 avctx->bits_per_coded_sample = 32;
1176 ctx->alpha_bits = 0;
1179 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1182 ctx->profile_info = prores_profile_info + ctx->profile;
1183 ctx->num_planes = 3 + !!ctx->alpha_bits;
1185 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1188 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1190 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1192 ctx->slices_width = ctx->mb_width / mps;
1193 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1194 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1195 ctx->pictures_per_frame = 1 + interlaced;
1197 if (ctx->quant_sel == -1)
1198 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1200 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1202 if (strlen(ctx->vendor) != 4) {
1203 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1204 return AVERROR_INVALIDDATA;
1207 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1208 if (!ctx->force_quant) {
1209 if (!ctx->bits_per_mb) {
1210 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1211 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1212 ctx->pictures_per_frame)
1214 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1215 if (ctx->alpha_bits)
1216 ctx->bits_per_mb *= 20;
1217 } else if (ctx->bits_per_mb < 128) {
1218 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1219 return AVERROR_INVALIDDATA;
1222 min_quant = ctx->profile_info->min_quant;
1223 max_quant = ctx->profile_info->max_quant;
1224 for (i = min_quant; i < MAX_STORED_Q; i++) {
1225 for (j = 0; j < 64; j++)
1226 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1229 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1230 if (!ctx->slice_q) {
1231 encode_close(avctx);
1232 return AVERROR(ENOMEM);
1235 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1237 encode_close(avctx);
1238 return AVERROR(ENOMEM);
1241 for (j = 0; j < avctx->thread_count; j++) {
1242 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1244 * sizeof(*ctx->tdata->nodes));
1245 if (!ctx->tdata[j].nodes) {
1246 encode_close(avctx);
1247 return AVERROR(ENOMEM);
1249 for (i = min_quant; i < max_quant + 2; i++) {
1250 ctx->tdata[j].nodes[i].prev_node = -1;
1251 ctx->tdata[j].nodes[i].bits = 0;
1252 ctx->tdata[j].nodes[i].score = 0;
1258 if (ctx->force_quant > 64) {
1259 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1260 return AVERROR_INVALIDDATA;
1263 for (j = 0; j < 64; j++) {
1264 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1265 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1268 ctx->bits_per_mb = ls * 8;
1269 if (ctx->chroma_factor == CFACTOR_Y444)
1270 ctx->bits_per_mb += ls * 4;
1273 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1274 ctx->slices_per_picture + 1) *
1275 (2 + 2 * ctx->num_planes +
1276 (mps * ctx->bits_per_mb) / 8)
1279 if (ctx->alpha_bits) {
1280 // The alpha plane is run-coded and might exceed the bit budget.
1281 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1282 ctx->slices_per_picture + 1) *
1283 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1284 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1287 avctx->codec_tag = ctx->profile_info->tag;
1289 av_log(avctx, AV_LOG_DEBUG,
1290 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1291 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1292 interlaced ? "yes" : "no", ctx->bits_per_mb);
1293 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1294 ctx->frame_size_upper_bound);
1299 #define OFFSET(x) offsetof(ProresContext, x)
1300 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1302 static const AVOption options[] = {
1303 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1304 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1305 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1306 { .i64 = PRORES_PROFILE_AUTO },
1307 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1308 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1309 0, 0, VE, "profile" },
1310 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1311 0, 0, VE, "profile" },
1312 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1313 0, 0, VE, "profile" },
1314 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1315 0, 0, VE, "profile" },
1316 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1317 0, 0, VE, "profile" },
1318 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1319 0, 0, VE, "profile" },
1320 { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1321 0, 0, VE, "profile" },
1322 { "vendor", "vendor ID", OFFSET(vendor),
1323 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1324 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1325 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1326 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1327 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1328 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1329 0, 0, VE, "quant_mat" },
1330 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1331 0, 0, VE, "quant_mat" },
1332 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1333 0, 0, VE, "quant_mat" },
1334 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1335 0, 0, VE, "quant_mat" },
1336 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1337 0, 0, VE, "quant_mat" },
1338 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1339 0, 0, VE, "quant_mat" },
1340 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1341 { .i64 = 16 }, 0, 16, VE },
1345 static const AVClass proresenc_class = {
1346 .class_name = "ProRes encoder",
1347 .item_name = av_default_item_name,
1349 .version = LIBAVUTIL_VERSION_INT,
1352 AVCodec ff_prores_ks_encoder = {
1353 .name = "prores_ks",
1354 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1355 .type = AVMEDIA_TYPE_VIDEO,
1356 .id = AV_CODEC_ID_PRORES,
1357 .priv_data_size = sizeof(ProresContext),
1358 .init = encode_init,
1359 .close = encode_close,
1360 .encode2 = encode_frame,
1361 .capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
1362 .pix_fmts = (const enum AVPixelFormat[]) {
1363 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1364 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1366 .priv_class = &proresenc_class,