4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_AUTO = -1,
44 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_STANDARD,
49 PRORES_PROFILE_4444XQ,
60 static const uint8_t prores_quant_matrices[][64] = {
62 4, 7, 9, 11, 13, 14, 15, 63,
63 7, 7, 11, 12, 14, 15, 63, 63,
64 9, 11, 13, 14, 15, 63, 63, 63,
65 11, 11, 13, 14, 63, 63, 63, 63,
66 11, 13, 14, 63, 63, 63, 63, 63,
67 13, 14, 63, 63, 63, 63, 63, 63,
68 13, 63, 63, 63, 63, 63, 63, 63,
69 63, 63, 63, 63, 63, 63, 63, 63,
72 4, 5, 6, 7, 9, 11, 13, 15,
73 5, 5, 7, 8, 11, 13, 15, 17,
74 6, 7, 9, 11, 13, 15, 15, 17,
75 7, 7, 9, 11, 13, 15, 17, 19,
76 7, 9, 11, 13, 14, 16, 19, 23,
77 9, 11, 13, 14, 16, 19, 23, 29,
78 9, 11, 13, 15, 17, 21, 28, 35,
79 11, 13, 16, 17, 21, 28, 35, 41,
82 4, 4, 5, 5, 6, 7, 7, 9,
83 4, 4, 5, 6, 7, 7, 9, 9,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 5, 6, 7, 7, 9, 9, 10,
86 5, 6, 7, 7, 8, 9, 10, 12,
87 6, 7, 7, 8, 9, 10, 12, 15,
88 6, 7, 7, 9, 10, 11, 14, 17,
89 7, 7, 9, 10, 11, 14, 17, 21,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 4,
95 4, 4, 4, 4, 4, 4, 4, 5,
96 4, 4, 4, 4, 4, 4, 5, 5,
97 4, 4, 4, 4, 4, 5, 5, 6,
98 4, 4, 4, 4, 5, 5, 6, 7,
99 4, 4, 4, 4, 5, 6, 7, 7,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
109 4, 4, 4, 4, 4, 4, 4, 4,
113 #define NUM_MB_LIMITS 4
114 static const int prores_mb_limits[NUM_MB_LIMITS] = {
115 1620, // up to 720x576
116 2700, // up to 960x720
117 6075, // up to 1440x1080
118 9216, // up to 2048x1152
121 static const struct prores_profile {
122 const char *full_name;
126 int br_tab[NUM_MB_LIMITS];
128 } prores_profile_info[6] = {
130 .full_name = "proxy",
131 .tag = MKTAG('a', 'p', 'c', 'o'),
134 .br_tab = { 300, 242, 220, 194 },
135 .quant = QUANT_MAT_PROXY,
139 .tag = MKTAG('a', 'p', 'c', 's'),
142 .br_tab = { 720, 560, 490, 440 },
143 .quant = QUANT_MAT_LT,
146 .full_name = "standard",
147 .tag = MKTAG('a', 'p', 'c', 'n'),
150 .br_tab = { 1050, 808, 710, 632 },
151 .quant = QUANT_MAT_STANDARD,
154 .full_name = "high quality",
155 .tag = MKTAG('a', 'p', 'c', 'h'),
158 .br_tab = { 1566, 1216, 1070, 950 },
159 .quant = QUANT_MAT_HQ,
163 .tag = MKTAG('a', 'p', '4', 'h'),
166 .br_tab = { 2350, 1828, 1600, 1425 },
167 .quant = QUANT_MAT_HQ,
170 .full_name = "4444XQ",
171 .tag = MKTAG('a', 'p', '4', 'x'),
174 .br_tab = { 3525, 2742, 2400, 2137 },
175 .quant = QUANT_MAT_HQ,
179 #define TRELLIS_WIDTH 16
180 #define SCORE_LIMIT INT_MAX / 2
189 #define MAX_STORED_Q 16
191 typedef struct ProresThreadData {
192 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
193 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
194 int16_t custom_q[64];
195 struct TrellisNode *nodes;
198 typedef struct ProresContext {
200 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
201 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
202 int16_t quants[MAX_STORED_Q][64];
203 int16_t custom_q[64];
204 const uint8_t *quant_mat;
205 const uint8_t *scantable;
207 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
208 ptrdiff_t linesize, int16_t *block);
212 int mb_width, mb_height;
214 int num_chroma_blocks, chroma_factor;
216 int slices_per_picture;
217 int pictures_per_frame; // 1 for progressive, 2 for interlaced
228 int frame_size_upper_bound;
231 const struct prores_profile *profile_info;
235 ProresThreadData *tdata;
238 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
239 ptrdiff_t linesize, int x, int y, int w, int h,
240 int16_t *blocks, uint16_t *emu_buf,
241 int mbs_per_slice, int blocks_per_mb, int is_chroma)
243 const uint16_t *esrc;
244 const int mb_width = 4 * blocks_per_mb;
248 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
250 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
254 if (x + mb_width <= w && y + 16 <= h) {
256 elinesize = linesize;
261 elinesize = 16 * sizeof(*emu_buf);
263 bw = FFMIN(w - x, mb_width);
264 bh = FFMIN(h - y, 16);
266 for (j = 0; j < bh; j++) {
267 memcpy(emu_buf + j * 16,
268 (const uint8_t*)src + j * linesize,
270 pix = emu_buf[j * 16 + bw - 1];
271 for (k = bw; k < mb_width; k++)
272 emu_buf[j * 16 + k] = pix;
275 memcpy(emu_buf + j * 16,
276 emu_buf + (bh - 1) * 16,
277 mb_width * sizeof(*emu_buf));
280 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
282 if (blocks_per_mb > 2) {
283 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
286 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
288 if (blocks_per_mb > 2) {
289 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
293 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
295 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
297 if (blocks_per_mb > 2) {
298 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
300 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
309 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
310 ptrdiff_t linesize, int x, int y, int w, int h,
311 int16_t *blocks, int mbs_per_slice, int abits)
313 const int slice_width = 16 * mbs_per_slice;
314 int i, j, copy_w, copy_h;
316 copy_w = FFMIN(w - x, slice_width);
317 copy_h = FFMIN(h - y, 16);
318 for (i = 0; i < copy_h; i++) {
319 memcpy(blocks, src, copy_w * sizeof(*src));
321 for (j = 0; j < copy_w; j++)
324 for (j = 0; j < copy_w; j++)
325 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
326 for (j = copy_w; j < slice_width; j++)
327 blocks[j] = blocks[copy_w - 1];
328 blocks += slice_width;
329 src += linesize >> 1;
331 for (; i < 16; i++) {
332 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
333 blocks += slice_width;
338 * Write an unsigned rice/exp golomb codeword.
340 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
342 unsigned int rice_order, exp_order, switch_bits, switch_val;
345 /* number of prefix bits to switch between Rice and expGolomb */
346 switch_bits = (codebook & 3) + 1;
347 rice_order = codebook >> 5; /* rice code order */
348 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
350 switch_val = switch_bits << rice_order;
352 if (val >= switch_val) {
353 val -= switch_val - (1 << exp_order);
354 exponent = av_log2(val);
356 put_bits(pb, exponent - exp_order + switch_bits, 0);
357 put_bits(pb, exponent + 1, val);
359 exponent = val >> rice_order;
362 put_bits(pb, exponent, 0);
365 put_sbits(pb, rice_order, val);
369 #define GET_SIGN(x) ((x) >> 31)
370 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
372 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
373 int blocks_per_slice, int scale)
376 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
378 prev_dc = (blocks[0] - 0x4000) / scale;
379 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
384 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
385 dc = (blocks[0] - 0x4000) / scale;
386 delta = dc - prev_dc;
387 new_sign = GET_SIGN(delta);
388 delta = (delta ^ sign) - sign;
389 code = MAKE_CODE(delta);
390 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
391 codebook = (code + (code & 1)) >> 1;
392 codebook = FFMIN(codebook, 3);
398 static void encode_acs(PutBitContext *pb, int16_t *blocks,
399 int blocks_per_slice,
400 int plane_size_factor,
401 const uint8_t *scan, const int16_t *qmat)
404 int run, level, run_cb, lev_cb;
405 int max_coeffs, abs_level;
407 max_coeffs = blocks_per_slice << 6;
408 run_cb = ff_prores_run_to_cb_index[4];
409 lev_cb = ff_prores_lev_to_cb_index[2];
412 for (i = 1; i < 64; i++) {
413 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
414 level = blocks[idx] / qmat[scan[i]];
416 abs_level = FFABS(level);
417 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
418 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
420 put_sbits(pb, 1, GET_SIGN(level));
422 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
423 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
432 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
433 const uint16_t *src, ptrdiff_t linesize,
434 int mbs_per_slice, int16_t *blocks,
435 int blocks_per_mb, int plane_size_factor,
438 int blocks_per_slice, saved_pos;
440 saved_pos = put_bits_count(pb);
441 blocks_per_slice = mbs_per_slice * blocks_per_mb;
443 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
444 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
445 ctx->scantable, qmat);
448 return (put_bits_count(pb) - saved_pos) >> 3;
451 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
453 const int dbits = (abits == 8) ? 4 : 7;
454 const int dsize = 1 << dbits - 1;
455 int diff = cur - prev;
457 diff = av_mod_uintp2(diff, abits);
458 if (diff >= (1 << abits) - dsize)
460 if (diff < -dsize || diff > dsize || !diff) {
462 put_bits(pb, abits, diff);
465 put_bits(pb, dbits - 1, FFABS(diff) - 1);
466 put_bits(pb, 1, diff < 0);
470 static void put_alpha_run(PutBitContext *pb, int run)
475 put_bits(pb, 4, run);
477 put_bits(pb, 15, run);
483 // todo alpha quantisation for high quants
484 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
485 int mbs_per_slice, uint16_t *blocks,
488 const int abits = ctx->alpha_bits;
489 const int mask = (1 << abits) - 1;
490 const int num_coeffs = mbs_per_slice * 256;
491 int saved_pos = put_bits_count(pb);
492 int prev = mask, cur;
497 put_alpha_diff(pb, cur, prev, abits);
502 put_alpha_run (pb, run);
503 put_alpha_diff(pb, cur, prev, abits);
509 } while (idx < num_coeffs);
511 put_alpha_run(pb, run);
513 return (put_bits_count(pb) - saved_pos) >> 3;
516 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
518 int sizes[4], int x, int y, int quant,
521 ProresContext *ctx = avctx->priv_data;
525 int slice_width_factor = av_log2(mbs_per_slice);
526 int num_cblocks, pwidth, line_add;
528 int plane_factor, is_chroma;
531 if (ctx->pictures_per_frame == 1)
534 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
536 if (ctx->force_quant) {
537 qmat = ctx->quants[0];
538 } else if (quant < MAX_STORED_Q) {
539 qmat = ctx->quants[quant];
541 qmat = ctx->custom_q;
542 for (i = 0; i < 64; i++)
543 qmat[i] = ctx->quant_mat[i] * quant;
546 for (i = 0; i < ctx->num_planes; i++) {
547 is_chroma = (i == 1 || i == 2);
548 plane_factor = slice_width_factor + 2;
550 plane_factor += ctx->chroma_factor - 3;
551 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
555 pwidth = avctx->width;
560 pwidth = avctx->width >> 1;
563 linesize = pic->linesize[i] * ctx->pictures_per_frame;
564 src = (const uint16_t*)(pic->data[i] + yp * linesize +
565 line_add * pic->linesize[i]) + xp;
568 get_slice_data(ctx, src, linesize, xp, yp,
569 pwidth, avctx->height / ctx->pictures_per_frame,
570 ctx->blocks[0], ctx->emu_buf,
571 mbs_per_slice, num_cblocks, is_chroma);
572 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
573 mbs_per_slice, ctx->blocks[0],
574 num_cblocks, plane_factor,
577 get_alpha_data(ctx, src, linesize, xp, yp,
578 pwidth, avctx->height / ctx->pictures_per_frame,
579 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
580 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
581 ctx->blocks[0], quant);
583 total_size += sizes[i];
584 if (put_bits_left(pb) < 0) {
585 av_log(avctx, AV_LOG_ERROR,
586 "Underestimated required buffer size.\n");
593 static inline int estimate_vlc(unsigned codebook, int val)
595 unsigned int rice_order, exp_order, switch_bits, switch_val;
598 /* number of prefix bits to switch between Rice and expGolomb */
599 switch_bits = (codebook & 3) + 1;
600 rice_order = codebook >> 5; /* rice code order */
601 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
603 switch_val = switch_bits << rice_order;
605 if (val >= switch_val) {
606 val -= switch_val - (1 << exp_order);
607 exponent = av_log2(val);
609 return exponent * 2 - exp_order + switch_bits + 1;
611 return (val >> rice_order) + rice_order + 1;
615 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
619 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
622 prev_dc = (blocks[0] - 0x4000) / scale;
623 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
627 *error += FFABS(blocks[0] - 0x4000) % scale;
629 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
630 dc = (blocks[0] - 0x4000) / scale;
631 *error += FFABS(blocks[0] - 0x4000) % scale;
632 delta = dc - prev_dc;
633 new_sign = GET_SIGN(delta);
634 delta = (delta ^ sign) - sign;
635 code = MAKE_CODE(delta);
636 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
637 codebook = (code + (code & 1)) >> 1;
638 codebook = FFMIN(codebook, 3);
646 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
647 int plane_size_factor,
648 const uint8_t *scan, const int16_t *qmat)
651 int run, level, run_cb, lev_cb;
652 int max_coeffs, abs_level;
655 max_coeffs = blocks_per_slice << 6;
656 run_cb = ff_prores_run_to_cb_index[4];
657 lev_cb = ff_prores_lev_to_cb_index[2];
660 for (i = 1; i < 64; i++) {
661 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
662 level = blocks[idx] / qmat[scan[i]];
663 *error += FFABS(blocks[idx]) % qmat[scan[i]];
665 abs_level = FFABS(level);
666 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
667 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
670 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
671 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
682 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
683 const uint16_t *src, ptrdiff_t linesize,
685 int blocks_per_mb, int plane_size_factor,
686 const int16_t *qmat, ProresThreadData *td)
688 int blocks_per_slice;
691 blocks_per_slice = mbs_per_slice * blocks_per_mb;
693 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
694 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
695 plane_size_factor, ctx->scantable, qmat);
697 return FFALIGN(bits, 8);
700 static int est_alpha_diff(int cur, int prev, int abits)
702 const int dbits = (abits == 8) ? 4 : 7;
703 const int dsize = 1 << dbits - 1;
704 int diff = cur - prev;
706 diff = av_mod_uintp2(diff, abits);
707 if (diff >= (1 << abits) - dsize)
709 if (diff < -dsize || diff > dsize || !diff)
715 static int estimate_alpha_plane(ProresContext *ctx, int *error,
716 const uint16_t *src, ptrdiff_t linesize,
717 int mbs_per_slice, int quant,
720 const int abits = ctx->alpha_bits;
721 const int mask = (1 << abits) - 1;
722 const int num_coeffs = mbs_per_slice * 256;
723 int prev = mask, cur;
730 bits = est_alpha_diff(cur, prev, abits);
741 bits += est_alpha_diff(cur, prev, abits);
747 } while (idx < num_coeffs);
759 static int find_slice_quant(AVCodecContext *avctx,
760 int trellis_node, int x, int y, int mbs_per_slice,
761 ProresThreadData *td)
763 ProresContext *ctx = avctx->priv_data;
764 int i, q, pq, xp, yp;
766 int slice_width_factor = av_log2(mbs_per_slice);
767 int num_cblocks[MAX_PLANES], pwidth;
768 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
769 const int min_quant = ctx->profile_info->min_quant;
770 const int max_quant = ctx->profile_info->max_quant;
771 int error, bits, bits_limit;
772 int mbs, prev, cur, new_score;
773 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
776 int linesize[4], line_add;
778 if (ctx->pictures_per_frame == 1)
781 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
782 mbs = x + mbs_per_slice;
784 for (i = 0; i < ctx->num_planes; i++) {
785 is_chroma[i] = (i == 1 || i == 2);
786 plane_factor[i] = slice_width_factor + 2;
788 plane_factor[i] += ctx->chroma_factor - 3;
789 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
793 pwidth = avctx->width;
798 pwidth = avctx->width >> 1;
801 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
802 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
803 line_add * ctx->pic->linesize[i]) + xp;
806 get_slice_data(ctx, src, linesize[i], xp, yp,
807 pwidth, avctx->height / ctx->pictures_per_frame,
808 td->blocks[i], td->emu_buf,
809 mbs_per_slice, num_cblocks[i], is_chroma[i]);
811 get_alpha_data(ctx, src, linesize[i], xp, yp,
812 pwidth, avctx->height / ctx->pictures_per_frame,
813 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
817 for (q = min_quant; q < max_quant + 2; q++) {
818 td->nodes[trellis_node + q].prev_node = -1;
819 td->nodes[trellis_node + q].quant = q;
822 // todo: maybe perform coarser quantising to fit into frame size when needed
823 for (q = min_quant; q <= max_quant; q++) {
826 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
827 bits += estimate_slice_plane(ctx, &error, i,
830 num_cblocks[i], plane_factor[i],
834 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
835 mbs_per_slice, q, td->blocks[3]);
836 if (bits > 65000 * 8)
839 slice_bits[q] = bits;
840 slice_score[q] = error;
842 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
843 slice_bits[max_quant + 1] = slice_bits[max_quant];
844 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
845 overquant = max_quant;
847 for (q = max_quant + 1; q < 128; q++) {
850 if (q < MAX_STORED_Q) {
851 qmat = ctx->quants[q];
854 for (i = 0; i < 64; i++)
855 qmat[i] = ctx->quant_mat[i] * q;
857 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
858 bits += estimate_slice_plane(ctx, &error, i,
861 num_cblocks[i], plane_factor[i],
865 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
866 mbs_per_slice, q, td->blocks[3]);
867 if (bits <= ctx->bits_per_mb * mbs_per_slice)
871 slice_bits[max_quant + 1] = bits;
872 slice_score[max_quant + 1] = error;
875 td->nodes[trellis_node + max_quant + 1].quant = overquant;
877 bits_limit = mbs * ctx->bits_per_mb;
878 for (pq = min_quant; pq < max_quant + 2; pq++) {
879 prev = trellis_node - TRELLIS_WIDTH + pq;
881 for (q = min_quant; q < max_quant + 2; q++) {
882 cur = trellis_node + q;
884 bits = td->nodes[prev].bits + slice_bits[q];
885 error = slice_score[q];
886 if (bits > bits_limit)
889 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
890 new_score = td->nodes[prev].score + error;
892 new_score = SCORE_LIMIT;
893 if (td->nodes[cur].prev_node == -1 ||
894 td->nodes[cur].score >= new_score) {
896 td->nodes[cur].bits = bits;
897 td->nodes[cur].score = new_score;
898 td->nodes[cur].prev_node = prev;
903 error = td->nodes[trellis_node + min_quant].score;
904 pq = trellis_node + min_quant;
905 for (q = min_quant + 1; q < max_quant + 2; q++) {
906 if (td->nodes[trellis_node + q].score <= error) {
907 error = td->nodes[trellis_node + q].score;
908 pq = trellis_node + q;
915 static int find_quant_thread(AVCodecContext *avctx, void *arg,
916 int jobnr, int threadnr)
918 ProresContext *ctx = avctx->priv_data;
919 ProresThreadData *td = ctx->tdata + threadnr;
920 int mbs_per_slice = ctx->mbs_per_slice;
921 int x, y = jobnr, mb, q = 0;
923 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
924 while (ctx->mb_width - x < mbs_per_slice)
926 q = find_slice_quant(avctx,
927 (mb + 1) * TRELLIS_WIDTH, x, y,
931 for (x = ctx->slices_width - 1; x >= 0; x--) {
932 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
933 q = td->nodes[q].prev_node;
939 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
940 const AVFrame *pic, int *got_packet)
942 ProresContext *ctx = avctx->priv_data;
943 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
944 uint8_t *picture_size_pos;
946 int x, y, i, mb, q = 0;
947 int sizes[4] = { 0 };
948 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
949 int frame_size, picture_size, slice_size;
951 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
955 pkt_size = ctx->frame_size_upper_bound;
957 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
960 orig_buf = pkt->data;
963 orig_buf += 4; // frame size
964 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
969 buf += 2; // frame header size will be stored here
970 bytestream_put_be16 (&buf, 0); // version 1
971 bytestream_put_buffer(&buf, ctx->vendor, 4);
972 bytestream_put_be16 (&buf, avctx->width);
973 bytestream_put_be16 (&buf, avctx->height);
975 frame_flags = ctx->chroma_factor << 6;
976 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
977 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
978 bytestream_put_byte (&buf, frame_flags);
980 bytestream_put_byte (&buf, 0); // reserved
981 bytestream_put_byte (&buf, pic->color_primaries);
982 bytestream_put_byte (&buf, pic->color_trc);
983 bytestream_put_byte (&buf, pic->colorspace);
984 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
985 bytestream_put_byte (&buf, 0); // reserved
986 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
987 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
988 // luma quantisation matrix
989 for (i = 0; i < 64; i++)
990 bytestream_put_byte(&buf, ctx->quant_mat[i]);
991 // chroma quantisation matrix
992 for (i = 0; i < 64; i++)
993 bytestream_put_byte(&buf, ctx->quant_mat[i]);
995 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
997 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
999 for (ctx->cur_picture_idx = 0;
1000 ctx->cur_picture_idx < ctx->pictures_per_frame;
1001 ctx->cur_picture_idx++) {
1003 picture_size_pos = buf + 1;
1004 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1005 buf += 4; // picture data size will be stored here
1006 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1007 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1009 // seek table - will be filled during slice encoding
1011 buf += ctx->slices_per_picture * 2;
1014 if (!ctx->force_quant) {
1015 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1021 for (y = 0; y < ctx->mb_height; y++) {
1022 int mbs_per_slice = ctx->mbs_per_slice;
1023 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1024 q = ctx->force_quant ? ctx->force_quant
1025 : ctx->slice_q[mb + y * ctx->slices_width];
1027 while (ctx->mb_width - x < mbs_per_slice)
1028 mbs_per_slice >>= 1;
1030 bytestream_put_byte(&buf, slice_hdr_size << 3);
1032 buf += slice_hdr_size - 1;
1033 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1034 uint8_t *start = pkt->data;
1035 // Recompute new size according to max_slice_size
1037 int delta = 200 + (ctx->pictures_per_frame *
1038 ctx->slices_per_picture + 1) *
1039 max_slice_size - pkt_size;
1041 delta = FFMAX(delta, 2 * max_slice_size);
1042 ctx->frame_size_upper_bound += delta;
1045 avpriv_request_sample(avctx,
1046 "Packet too small: is %i,"
1047 " needs %i (slice: %i). "
1048 "Correct allocation",
1049 pkt_size, delta, max_slice_size);
1053 ret = av_grow_packet(pkt, delta);
1059 orig_buf = pkt->data + (orig_buf - start);
1060 buf = pkt->data + (buf - start);
1061 picture_size_pos = pkt->data + (picture_size_pos - start);
1062 slice_sizes = pkt->data + (slice_sizes - start);
1063 slice_hdr = pkt->data + (slice_hdr - start);
1064 tmp = pkt->data + (tmp - start);
1066 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1067 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1072 bytestream_put_byte(&slice_hdr, q);
1073 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1074 for (i = 0; i < ctx->num_planes - 1; i++) {
1075 bytestream_put_be16(&slice_hdr, sizes[i]);
1076 slice_size += sizes[i];
1078 bytestream_put_be16(&slice_sizes, slice_size);
1079 buf += slice_size - slice_hdr_size;
1080 if (max_slice_size < slice_size)
1081 max_slice_size = slice_size;
1085 picture_size = buf - (picture_size_pos - 1);
1086 bytestream_put_be32(&picture_size_pos, picture_size);
1090 frame_size = buf - orig_buf;
1091 bytestream_put_be32(&orig_buf, frame_size);
1093 pkt->size = frame_size;
1094 pkt->flags |= AV_PKT_FLAG_KEY;
1100 static av_cold int encode_close(AVCodecContext *avctx)
1102 ProresContext *ctx = avctx->priv_data;
1106 for (i = 0; i < avctx->thread_count; i++)
1107 av_freep(&ctx->tdata[i].nodes);
1109 av_freep(&ctx->tdata);
1110 av_freep(&ctx->slice_q);
1115 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1116 ptrdiff_t linesize, int16_t *block)
1119 const uint16_t *tsrc = src;
1121 for (y = 0; y < 8; y++) {
1122 for (x = 0; x < 8; x++)
1123 block[y * 8 + x] = tsrc[x];
1124 tsrc += linesize >> 1;
1129 static av_cold int encode_init(AVCodecContext *avctx)
1131 ProresContext *ctx = avctx->priv_data;
1134 int min_quant, max_quant;
1135 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1137 avctx->bits_per_raw_sample = 10;
1138 #if FF_API_CODED_FRAME
1139 FF_DISABLE_DEPRECATION_WARNINGS
1140 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1141 avctx->coded_frame->key_frame = 1;
1142 FF_ENABLE_DEPRECATION_WARNINGS
1145 ctx->fdct = prores_fdct;
1146 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1147 : ff_prores_progressive_scan;
1148 ff_fdctdsp_init(&ctx->fdsp, avctx);
1150 mps = ctx->mbs_per_slice;
1151 if (mps & (mps - 1)) {
1152 av_log(avctx, AV_LOG_ERROR,
1153 "there should be an integer power of two MBs per slice\n");
1154 return AVERROR(EINVAL);
1156 if (ctx->profile == PRORES_PROFILE_AUTO) {
1157 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1158 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1159 !(desc->log2_chroma_w + desc->log2_chroma_h))
1160 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1161 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1162 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1163 ? "4:4:4:4 profile because of the used input colorspace"
1164 : "HQ profile to keep best quality");
1166 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1167 if (ctx->profile != PRORES_PROFILE_4444 &&
1168 ctx->profile != PRORES_PROFILE_4444XQ) {
1169 // force alpha and warn
1170 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1171 "encode alpha. Override with -profile if needed.\n");
1172 ctx->alpha_bits = 0;
1174 if (ctx->alpha_bits & 7) {
1175 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1176 return AVERROR(EINVAL);
1178 avctx->bits_per_coded_sample = 32;
1180 ctx->alpha_bits = 0;
1183 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1186 ctx->profile_info = prores_profile_info + ctx->profile;
1187 ctx->num_planes = 3 + !!ctx->alpha_bits;
1189 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1192 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1194 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1196 ctx->slices_width = ctx->mb_width / mps;
1197 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1198 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1199 ctx->pictures_per_frame = 1 + interlaced;
1201 if (ctx->quant_sel == -1)
1202 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1204 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1206 if (strlen(ctx->vendor) != 4) {
1207 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1208 return AVERROR_INVALIDDATA;
1211 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1212 if (!ctx->force_quant) {
1213 if (!ctx->bits_per_mb) {
1214 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1215 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1216 ctx->pictures_per_frame)
1218 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1219 if (ctx->alpha_bits)
1220 ctx->bits_per_mb *= 20;
1221 } else if (ctx->bits_per_mb < 128) {
1222 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1223 return AVERROR_INVALIDDATA;
1226 min_quant = ctx->profile_info->min_quant;
1227 max_quant = ctx->profile_info->max_quant;
1228 for (i = min_quant; i < MAX_STORED_Q; i++) {
1229 for (j = 0; j < 64; j++)
1230 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1233 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1234 if (!ctx->slice_q) {
1235 encode_close(avctx);
1236 return AVERROR(ENOMEM);
1239 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1241 encode_close(avctx);
1242 return AVERROR(ENOMEM);
1245 for (j = 0; j < avctx->thread_count; j++) {
1246 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1248 * sizeof(*ctx->tdata->nodes));
1249 if (!ctx->tdata[j].nodes) {
1250 encode_close(avctx);
1251 return AVERROR(ENOMEM);
1253 for (i = min_quant; i < max_quant + 2; i++) {
1254 ctx->tdata[j].nodes[i].prev_node = -1;
1255 ctx->tdata[j].nodes[i].bits = 0;
1256 ctx->tdata[j].nodes[i].score = 0;
1262 if (ctx->force_quant > 64) {
1263 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1264 return AVERROR_INVALIDDATA;
1267 for (j = 0; j < 64; j++) {
1268 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1269 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1272 ctx->bits_per_mb = ls * 8;
1273 if (ctx->chroma_factor == CFACTOR_Y444)
1274 ctx->bits_per_mb += ls * 4;
1277 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1278 ctx->slices_per_picture + 1) *
1279 (2 + 2 * ctx->num_planes +
1280 (mps * ctx->bits_per_mb) / 8)
1283 if (ctx->alpha_bits) {
1284 // The alpha plane is run-coded and might exceed the bit budget.
1285 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1286 ctx->slices_per_picture + 1) *
1287 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1288 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1291 avctx->codec_tag = ctx->profile_info->tag;
1293 av_log(avctx, AV_LOG_DEBUG,
1294 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1295 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1296 interlaced ? "yes" : "no", ctx->bits_per_mb);
1297 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1298 ctx->frame_size_upper_bound);
1303 #define OFFSET(x) offsetof(ProresContext, x)
1304 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1306 static const AVOption options[] = {
1307 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1308 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1309 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1310 { .i64 = PRORES_PROFILE_AUTO },
1311 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1312 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1313 0, 0, VE, "profile" },
1314 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1315 0, 0, VE, "profile" },
1316 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1317 0, 0, VE, "profile" },
1318 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1319 0, 0, VE, "profile" },
1320 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1321 0, 0, VE, "profile" },
1322 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1323 0, 0, VE, "profile" },
1324 { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1325 0, 0, VE, "profile" },
1326 { "vendor", "vendor ID", OFFSET(vendor),
1327 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1328 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1329 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1330 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1331 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1332 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1333 0, 0, VE, "quant_mat" },
1334 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1335 0, 0, VE, "quant_mat" },
1336 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1337 0, 0, VE, "quant_mat" },
1338 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1339 0, 0, VE, "quant_mat" },
1340 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1341 0, 0, VE, "quant_mat" },
1342 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1343 0, 0, VE, "quant_mat" },
1344 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1345 { .i64 = 16 }, 0, 16, VE },
1349 static const AVClass proresenc_class = {
1350 .class_name = "ProRes encoder",
1351 .item_name = av_default_item_name,
1353 .version = LIBAVUTIL_VERSION_INT,
1356 AVCodec ff_prores_ks_encoder = {
1357 .name = "prores_ks",
1358 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1359 .type = AVMEDIA_TYPE_VIDEO,
1360 .id = AV_CODEC_ID_PRORES,
1361 .priv_data_size = sizeof(ProresContext),
1362 .init = encode_init,
1363 .close = encode_close,
1364 .encode2 = encode_frame,
1365 .capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
1366 .pix_fmts = (const enum AVPixelFormat[]) {
1367 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1368 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1370 .priv_class = &proresenc_class,