4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_AUTO = -1,
44 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_STANDARD,
59 static const uint8_t prores_quant_matrices[][64] = {
61 4, 7, 9, 11, 13, 14, 15, 63,
62 7, 7, 11, 12, 14, 15, 63, 63,
63 9, 11, 13, 14, 15, 63, 63, 63,
64 11, 11, 13, 14, 63, 63, 63, 63,
65 11, 13, 14, 63, 63, 63, 63, 63,
66 13, 14, 63, 63, 63, 63, 63, 63,
67 13, 63, 63, 63, 63, 63, 63, 63,
68 63, 63, 63, 63, 63, 63, 63, 63,
71 4, 5, 6, 7, 9, 11, 13, 15,
72 5, 5, 7, 8, 11, 13, 15, 17,
73 6, 7, 9, 11, 13, 15, 15, 17,
74 7, 7, 9, 11, 13, 15, 17, 19,
75 7, 9, 11, 13, 14, 16, 19, 23,
76 9, 11, 13, 14, 16, 19, 23, 29,
77 9, 11, 13, 15, 17, 21, 28, 35,
78 11, 13, 16, 17, 21, 28, 35, 41,
81 4, 4, 5, 5, 6, 7, 7, 9,
82 4, 4, 5, 6, 7, 7, 9, 9,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 6, 7, 7, 8, 9, 10, 12,
86 6, 7, 7, 8, 9, 10, 12, 15,
87 6, 7, 7, 9, 10, 11, 14, 17,
88 7, 7, 9, 10, 11, 14, 17, 21,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 5,
95 4, 4, 4, 4, 4, 4, 5, 5,
96 4, 4, 4, 4, 4, 5, 5, 6,
97 4, 4, 4, 4, 5, 5, 6, 7,
98 4, 4, 4, 4, 5, 6, 7, 7,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
112 #define NUM_MB_LIMITS 4
113 static const int prores_mb_limits[NUM_MB_LIMITS] = {
114 1620, // up to 720x576
115 2700, // up to 960x720
116 6075, // up to 1440x1080
117 9216, // up to 2048x1152
120 static const struct prores_profile {
121 const char *full_name;
125 int br_tab[NUM_MB_LIMITS];
127 } prores_profile_info[5] = {
129 .full_name = "proxy",
130 .tag = MKTAG('a', 'p', 'c', 'o'),
133 .br_tab = { 300, 242, 220, 194 },
134 .quant = QUANT_MAT_PROXY,
138 .tag = MKTAG('a', 'p', 'c', 's'),
141 .br_tab = { 720, 560, 490, 440 },
142 .quant = QUANT_MAT_LT,
145 .full_name = "standard",
146 .tag = MKTAG('a', 'p', 'c', 'n'),
149 .br_tab = { 1050, 808, 710, 632 },
150 .quant = QUANT_MAT_STANDARD,
153 .full_name = "high quality",
154 .tag = MKTAG('a', 'p', 'c', 'h'),
157 .br_tab = { 1566, 1216, 1070, 950 },
158 .quant = QUANT_MAT_HQ,
162 .tag = MKTAG('a', 'p', '4', 'h'),
165 .br_tab = { 2350, 1828, 1600, 1425 },
166 .quant = QUANT_MAT_HQ,
170 #define TRELLIS_WIDTH 16
171 #define SCORE_LIMIT INT_MAX / 2
180 #define MAX_STORED_Q 16
182 typedef struct ProresThreadData {
183 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
184 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
185 int16_t custom_q[64];
186 struct TrellisNode *nodes;
189 typedef struct ProresContext {
191 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
192 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
193 int16_t quants[MAX_STORED_Q][64];
194 int16_t custom_q[64];
195 const uint8_t *quant_mat;
196 const uint8_t *scantable;
198 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
199 int linesize, int16_t *block);
202 int mb_width, mb_height;
204 int num_chroma_blocks, chroma_factor;
206 int slices_per_picture;
207 int pictures_per_frame; // 1 for progressive, 2 for interlaced
218 int frame_size_upper_bound;
221 const struct prores_profile *profile_info;
225 ProresThreadData *tdata;
228 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
229 int linesize, int x, int y, int w, int h,
230 int16_t *blocks, uint16_t *emu_buf,
231 int mbs_per_slice, int blocks_per_mb, int is_chroma)
233 const uint16_t *esrc;
234 const int mb_width = 4 * blocks_per_mb;
238 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
240 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
244 if (x + mb_width <= w && y + 16 <= h) {
246 elinesize = linesize;
251 elinesize = 16 * sizeof(*emu_buf);
253 bw = FFMIN(w - x, mb_width);
254 bh = FFMIN(h - y, 16);
256 for (j = 0; j < bh; j++) {
257 memcpy(emu_buf + j * 16,
258 (const uint8_t*)src + j * linesize,
260 pix = emu_buf[j * 16 + bw - 1];
261 for (k = bw; k < mb_width; k++)
262 emu_buf[j * 16 + k] = pix;
265 memcpy(emu_buf + j * 16,
266 emu_buf + (bh - 1) * 16,
267 mb_width * sizeof(*emu_buf));
270 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
272 if (blocks_per_mb > 2) {
273 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
276 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
278 if (blocks_per_mb > 2) {
279 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
283 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
285 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
287 if (blocks_per_mb > 2) {
288 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
290 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
299 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
300 int linesize, int x, int y, int w, int h,
301 int16_t *blocks, int mbs_per_slice, int abits)
303 const int slice_width = 16 * mbs_per_slice;
304 int i, j, copy_w, copy_h;
306 copy_w = FFMIN(w - x, slice_width);
307 copy_h = FFMIN(h - y, 16);
308 for (i = 0; i < copy_h; i++) {
309 memcpy(blocks, src, copy_w * sizeof(*src));
311 for (j = 0; j < copy_w; j++)
314 for (j = 0; j < copy_w; j++)
315 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
316 for (j = copy_w; j < slice_width; j++)
317 blocks[j] = blocks[copy_w - 1];
318 blocks += slice_width;
319 src += linesize >> 1;
321 for (; i < 16; i++) {
322 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
323 blocks += slice_width;
328 * Write an unsigned rice/exp golomb codeword.
330 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
332 unsigned int rice_order, exp_order, switch_bits, switch_val;
335 /* number of prefix bits to switch between Rice and expGolomb */
336 switch_bits = (codebook & 3) + 1;
337 rice_order = codebook >> 5; /* rice code order */
338 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
340 switch_val = switch_bits << rice_order;
342 if (val >= switch_val) {
343 val -= switch_val - (1 << exp_order);
344 exponent = av_log2(val);
346 put_bits(pb, exponent - exp_order + switch_bits, 0);
347 put_bits(pb, exponent + 1, val);
349 exponent = val >> rice_order;
352 put_bits(pb, exponent, 0);
355 put_sbits(pb, rice_order, val);
359 #define GET_SIGN(x) ((x) >> 31)
360 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
362 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
363 int blocks_per_slice, int scale)
366 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
368 prev_dc = (blocks[0] - 0x4000) / scale;
369 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
374 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
375 dc = (blocks[0] - 0x4000) / scale;
376 delta = dc - prev_dc;
377 new_sign = GET_SIGN(delta);
378 delta = (delta ^ sign) - sign;
379 code = MAKE_CODE(delta);
380 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
381 codebook = (code + (code & 1)) >> 1;
382 codebook = FFMIN(codebook, 3);
388 static void encode_acs(PutBitContext *pb, int16_t *blocks,
389 int blocks_per_slice,
390 int plane_size_factor,
391 const uint8_t *scan, const int16_t *qmat)
394 int run, level, run_cb, lev_cb;
395 int max_coeffs, abs_level;
397 max_coeffs = blocks_per_slice << 6;
398 run_cb = ff_prores_run_to_cb_index[4];
399 lev_cb = ff_prores_lev_to_cb_index[2];
402 for (i = 1; i < 64; i++) {
403 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
404 level = blocks[idx] / qmat[scan[i]];
406 abs_level = FFABS(level);
407 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
408 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
410 put_sbits(pb, 1, GET_SIGN(level));
412 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
413 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
422 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
423 const uint16_t *src, int linesize,
424 int mbs_per_slice, int16_t *blocks,
425 int blocks_per_mb, int plane_size_factor,
428 int blocks_per_slice, saved_pos;
430 saved_pos = put_bits_count(pb);
431 blocks_per_slice = mbs_per_slice * blocks_per_mb;
433 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
434 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
435 ctx->scantable, qmat);
438 return (put_bits_count(pb) - saved_pos) >> 3;
441 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
443 const int mask = (1 << abits) - 1;
444 const int dbits = (abits == 8) ? 4 : 7;
445 const int dsize = 1 << dbits - 1;
446 int diff = cur - prev;
449 if (diff >= (1 << abits) - dsize)
451 if (diff < -dsize || diff > dsize || !diff) {
453 put_bits(pb, abits, diff);
456 put_bits(pb, dbits - 1, FFABS(diff) - 1);
457 put_bits(pb, 1, diff < 0);
461 static void put_alpha_run(PutBitContext *pb, int run)
466 put_bits(pb, 4, run);
468 put_bits(pb, 15, run);
474 // todo alpha quantisation for high quants
475 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
476 int mbs_per_slice, uint16_t *blocks,
479 const int abits = ctx->alpha_bits;
480 const int mask = (1 << abits) - 1;
481 const int num_coeffs = mbs_per_slice * 256;
482 int saved_pos = put_bits_count(pb);
483 int prev = mask, cur;
488 put_alpha_diff(pb, cur, prev, abits);
493 put_alpha_run (pb, run);
494 put_alpha_diff(pb, cur, prev, abits);
500 } while (idx < num_coeffs);
502 put_alpha_run(pb, run);
504 return (put_bits_count(pb) - saved_pos) >> 3;
507 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
509 int sizes[4], int x, int y, int quant,
512 ProresContext *ctx = avctx->priv_data;
516 int slice_width_factor = av_log2(mbs_per_slice);
517 int num_cblocks, pwidth, linesize, line_add;
518 int plane_factor, is_chroma;
521 if (ctx->pictures_per_frame == 1)
524 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
526 if (ctx->force_quant) {
527 qmat = ctx->quants[0];
528 } else if (quant < MAX_STORED_Q) {
529 qmat = ctx->quants[quant];
531 qmat = ctx->custom_q;
532 for (i = 0; i < 64; i++)
533 qmat[i] = ctx->quant_mat[i] * quant;
536 for (i = 0; i < ctx->num_planes; i++) {
537 is_chroma = (i == 1 || i == 2);
538 plane_factor = slice_width_factor + 2;
540 plane_factor += ctx->chroma_factor - 3;
541 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
545 pwidth = avctx->width;
550 pwidth = avctx->width >> 1;
553 linesize = pic->linesize[i] * ctx->pictures_per_frame;
554 src = (const uint16_t*)(pic->data[i] + yp * linesize +
555 line_add * pic->linesize[i]) + xp;
558 get_slice_data(ctx, src, linesize, xp, yp,
559 pwidth, avctx->height / ctx->pictures_per_frame,
560 ctx->blocks[0], ctx->emu_buf,
561 mbs_per_slice, num_cblocks, is_chroma);
562 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
563 mbs_per_slice, ctx->blocks[0],
564 num_cblocks, plane_factor,
567 get_alpha_data(ctx, src, linesize, xp, yp,
568 pwidth, avctx->height / ctx->pictures_per_frame,
569 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
570 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
571 ctx->blocks[0], quant);
573 total_size += sizes[i];
574 if (put_bits_left(pb) < 0) {
575 av_log(avctx, AV_LOG_ERROR,
576 "Underestimated required buffer size.\n");
583 static inline int estimate_vlc(unsigned codebook, int val)
585 unsigned int rice_order, exp_order, switch_bits, switch_val;
588 /* number of prefix bits to switch between Rice and expGolomb */
589 switch_bits = (codebook & 3) + 1;
590 rice_order = codebook >> 5; /* rice code order */
591 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
593 switch_val = switch_bits << rice_order;
595 if (val >= switch_val) {
596 val -= switch_val - (1 << exp_order);
597 exponent = av_log2(val);
599 return exponent * 2 - exp_order + switch_bits + 1;
601 return (val >> rice_order) + rice_order + 1;
605 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
609 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
612 prev_dc = (blocks[0] - 0x4000) / scale;
613 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
617 *error += FFABS(blocks[0] - 0x4000) % scale;
619 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
620 dc = (blocks[0] - 0x4000) / scale;
621 *error += FFABS(blocks[0] - 0x4000) % scale;
622 delta = dc - prev_dc;
623 new_sign = GET_SIGN(delta);
624 delta = (delta ^ sign) - sign;
625 code = MAKE_CODE(delta);
626 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
627 codebook = (code + (code & 1)) >> 1;
628 codebook = FFMIN(codebook, 3);
636 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
637 int plane_size_factor,
638 const uint8_t *scan, const int16_t *qmat)
641 int run, level, run_cb, lev_cb;
642 int max_coeffs, abs_level;
645 max_coeffs = blocks_per_slice << 6;
646 run_cb = ff_prores_run_to_cb_index[4];
647 lev_cb = ff_prores_lev_to_cb_index[2];
650 for (i = 1; i < 64; i++) {
651 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
652 level = blocks[idx] / qmat[scan[i]];
653 *error += FFABS(blocks[idx]) % qmat[scan[i]];
655 abs_level = FFABS(level);
656 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
657 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
660 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
661 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
672 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
673 const uint16_t *src, int linesize,
675 int blocks_per_mb, int plane_size_factor,
676 const int16_t *qmat, ProresThreadData *td)
678 int blocks_per_slice;
681 blocks_per_slice = mbs_per_slice * blocks_per_mb;
683 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
684 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
685 plane_size_factor, ctx->scantable, qmat);
687 return FFALIGN(bits, 8);
690 static int est_alpha_diff(int cur, int prev, int abits)
692 const int mask = (1 << abits) - 1;
693 const int dbits = (abits == 8) ? 4 : 7;
694 const int dsize = 1 << dbits - 1;
695 int diff = cur - prev;
698 if (diff >= (1 << abits) - dsize)
700 if (diff < -dsize || diff > dsize || !diff)
706 static int estimate_alpha_plane(ProresContext *ctx, int *error,
707 const uint16_t *src, int linesize,
708 int mbs_per_slice, int quant,
711 const int abits = ctx->alpha_bits;
712 const int mask = (1 << abits) - 1;
713 const int num_coeffs = mbs_per_slice * 256;
714 int prev = mask, cur;
721 bits = est_alpha_diff(cur, prev, abits);
732 bits += est_alpha_diff(cur, prev, abits);
738 } while (idx < num_coeffs);
750 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
751 int trellis_node, int x, int y, int mbs_per_slice,
752 ProresThreadData *td)
754 ProresContext *ctx = avctx->priv_data;
755 int i, q, pq, xp, yp;
757 int slice_width_factor = av_log2(mbs_per_slice);
758 int num_cblocks[MAX_PLANES], pwidth;
759 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
760 const int min_quant = ctx->profile_info->min_quant;
761 const int max_quant = ctx->profile_info->max_quant;
762 int error, bits, bits_limit;
763 int mbs, prev, cur, new_score;
764 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
767 int linesize[4], line_add;
769 if (ctx->pictures_per_frame == 1)
772 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
773 mbs = x + mbs_per_slice;
775 for (i = 0; i < ctx->num_planes; i++) {
776 is_chroma[i] = (i == 1 || i == 2);
777 plane_factor[i] = slice_width_factor + 2;
779 plane_factor[i] += ctx->chroma_factor - 3;
780 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
784 pwidth = avctx->width;
789 pwidth = avctx->width >> 1;
792 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
793 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
794 line_add * pic->linesize[i]) + xp;
797 get_slice_data(ctx, src, linesize[i], xp, yp,
798 pwidth, avctx->height / ctx->pictures_per_frame,
799 td->blocks[i], td->emu_buf,
800 mbs_per_slice, num_cblocks[i], is_chroma[i]);
802 get_alpha_data(ctx, src, linesize[i], xp, yp,
803 pwidth, avctx->height / ctx->pictures_per_frame,
804 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
808 for (q = min_quant; q < max_quant + 2; q++) {
809 td->nodes[trellis_node + q].prev_node = -1;
810 td->nodes[trellis_node + q].quant = q;
813 // todo: maybe perform coarser quantising to fit into frame size when needed
814 for (q = min_quant; q <= max_quant; q++) {
817 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
818 bits += estimate_slice_plane(ctx, &error, i,
821 num_cblocks[i], plane_factor[i],
825 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
826 mbs_per_slice, q, td->blocks[3]);
827 if (bits > 65000 * 8)
830 slice_bits[q] = bits;
831 slice_score[q] = error;
833 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
834 slice_bits[max_quant + 1] = slice_bits[max_quant];
835 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
836 overquant = max_quant;
838 for (q = max_quant + 1; q < 128; q++) {
841 if (q < MAX_STORED_Q) {
842 qmat = ctx->quants[q];
845 for (i = 0; i < 64; i++)
846 qmat[i] = ctx->quant_mat[i] * q;
848 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
849 bits += estimate_slice_plane(ctx, &error, i,
852 num_cblocks[i], plane_factor[i],
856 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
857 mbs_per_slice, q, td->blocks[3]);
858 if (bits <= ctx->bits_per_mb * mbs_per_slice)
862 slice_bits[max_quant + 1] = bits;
863 slice_score[max_quant + 1] = error;
866 td->nodes[trellis_node + max_quant + 1].quant = overquant;
868 bits_limit = mbs * ctx->bits_per_mb;
869 for (pq = min_quant; pq < max_quant + 2; pq++) {
870 prev = trellis_node - TRELLIS_WIDTH + pq;
872 for (q = min_quant; q < max_quant + 2; q++) {
873 cur = trellis_node + q;
875 bits = td->nodes[prev].bits + slice_bits[q];
876 error = slice_score[q];
877 if (bits > bits_limit)
880 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
881 new_score = td->nodes[prev].score + error;
883 new_score = SCORE_LIMIT;
884 if (td->nodes[cur].prev_node == -1 ||
885 td->nodes[cur].score >= new_score) {
887 td->nodes[cur].bits = bits;
888 td->nodes[cur].score = new_score;
889 td->nodes[cur].prev_node = prev;
894 error = td->nodes[trellis_node + min_quant].score;
895 pq = trellis_node + min_quant;
896 for (q = min_quant + 1; q < max_quant + 2; q++) {
897 if (td->nodes[trellis_node + q].score <= error) {
898 error = td->nodes[trellis_node + q].score;
899 pq = trellis_node + q;
906 static int find_quant_thread(AVCodecContext *avctx, void *arg,
907 int jobnr, int threadnr)
909 ProresContext *ctx = avctx->priv_data;
910 ProresThreadData *td = ctx->tdata + threadnr;
911 int mbs_per_slice = ctx->mbs_per_slice;
912 int x, y = jobnr, mb, q = 0;
914 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
915 while (ctx->mb_width - x < mbs_per_slice)
917 q = find_slice_quant(avctx, avctx->coded_frame,
918 (mb + 1) * TRELLIS_WIDTH, x, y,
922 for (x = ctx->slices_width - 1; x >= 0; x--) {
923 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
924 q = td->nodes[q].prev_node;
930 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
931 const AVFrame *pic, int *got_packet)
933 ProresContext *ctx = avctx->priv_data;
934 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
935 uint8_t *picture_size_pos;
937 int x, y, i, mb, q = 0;
938 int sizes[4] = { 0 };
939 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
940 int frame_size, picture_size, slice_size;
942 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
945 *avctx->coded_frame = *pic;
946 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
947 avctx->coded_frame->key_frame = 1;
949 pkt_size = ctx->frame_size_upper_bound;
951 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
954 orig_buf = pkt->data;
957 orig_buf += 4; // frame size
958 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
963 buf += 2; // frame header size will be stored here
964 bytestream_put_be16 (&buf, 0); // version 1
965 bytestream_put_buffer(&buf, ctx->vendor, 4);
966 bytestream_put_be16 (&buf, avctx->width);
967 bytestream_put_be16 (&buf, avctx->height);
969 frame_flags = ctx->chroma_factor << 6;
970 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
971 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
972 bytestream_put_byte (&buf, frame_flags);
974 bytestream_put_byte (&buf, 0); // reserved
975 bytestream_put_byte (&buf, avctx->color_primaries);
976 bytestream_put_byte (&buf, avctx->color_trc);
977 bytestream_put_byte (&buf, avctx->colorspace);
978 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
979 bytestream_put_byte (&buf, 0); // reserved
980 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
981 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
982 // luma quantisation matrix
983 for (i = 0; i < 64; i++)
984 bytestream_put_byte(&buf, ctx->quant_mat[i]);
985 // chroma quantisation matrix
986 for (i = 0; i < 64; i++)
987 bytestream_put_byte(&buf, ctx->quant_mat[i]);
989 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
991 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
993 for (ctx->cur_picture_idx = 0;
994 ctx->cur_picture_idx < ctx->pictures_per_frame;
995 ctx->cur_picture_idx++) {
997 picture_size_pos = buf + 1;
998 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
999 buf += 4; // picture data size will be stored here
1000 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1001 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1003 // seek table - will be filled during slice encoding
1005 buf += ctx->slices_per_picture * 2;
1008 if (!ctx->force_quant) {
1009 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1015 for (y = 0; y < ctx->mb_height; y++) {
1016 int mbs_per_slice = ctx->mbs_per_slice;
1017 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1018 q = ctx->force_quant ? ctx->force_quant
1019 : ctx->slice_q[mb + y * ctx->slices_width];
1021 while (ctx->mb_width - x < mbs_per_slice)
1022 mbs_per_slice >>= 1;
1024 bytestream_put_byte(&buf, slice_hdr_size << 3);
1026 buf += slice_hdr_size - 1;
1027 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1028 uint8_t *start = pkt->data;
1029 // Recompute new size according to max_slice_size
1031 int delta = 200 + (ctx->pictures_per_frame *
1032 ctx->slices_per_picture + 1) *
1033 max_slice_size - pkt_size;
1035 delta = FFMAX(delta, 2 * max_slice_size);
1036 ctx->frame_size_upper_bound += delta;
1039 avpriv_request_sample(avctx,
1040 "Packet too small: is %i,"
1041 " needs %i (slice: %i). "
1042 "Correct allocation",
1043 pkt_size, delta, max_slice_size);
1047 ret = av_grow_packet(pkt, delta);
1053 orig_buf = pkt->data + (orig_buf - start);
1054 buf = pkt->data + (buf - start);
1055 picture_size_pos = pkt->data + (picture_size_pos - start);
1056 slice_sizes = pkt->data + (slice_sizes - start);
1057 slice_hdr = pkt->data + (slice_hdr - start);
1058 tmp = pkt->data + (tmp - start);
1060 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1061 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1066 bytestream_put_byte(&slice_hdr, q);
1067 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1068 for (i = 0; i < ctx->num_planes - 1; i++) {
1069 bytestream_put_be16(&slice_hdr, sizes[i]);
1070 slice_size += sizes[i];
1072 bytestream_put_be16(&slice_sizes, slice_size);
1073 buf += slice_size - slice_hdr_size;
1074 if (max_slice_size < slice_size)
1075 max_slice_size = slice_size;
1079 picture_size = buf - (picture_size_pos - 1);
1080 bytestream_put_be32(&picture_size_pos, picture_size);
1084 frame_size = buf - orig_buf;
1085 bytestream_put_be32(&orig_buf, frame_size);
1087 pkt->size = frame_size;
1088 pkt->flags |= AV_PKT_FLAG_KEY;
1094 static av_cold int encode_close(AVCodecContext *avctx)
1096 ProresContext *ctx = avctx->priv_data;
1099 av_freep(&avctx->coded_frame);
1102 for (i = 0; i < avctx->thread_count; i++)
1103 av_freep(&ctx->tdata[i].nodes);
1105 av_freep(&ctx->tdata);
1106 av_freep(&ctx->slice_q);
1111 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1112 int linesize, int16_t *block)
1115 const uint16_t *tsrc = src;
1117 for (y = 0; y < 8; y++) {
1118 for (x = 0; x < 8; x++)
1119 block[y * 8 + x] = tsrc[x];
1120 tsrc += linesize >> 1;
1125 static av_cold int encode_init(AVCodecContext *avctx)
1127 ProresContext *ctx = avctx->priv_data;
1130 int min_quant, max_quant;
1131 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1133 avctx->bits_per_raw_sample = 10;
1134 avctx->coded_frame = av_frame_alloc();
1135 if (!avctx->coded_frame)
1136 return AVERROR(ENOMEM);
1138 ctx->fdct = prores_fdct;
1139 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1140 : ff_prores_progressive_scan;
1141 ff_fdctdsp_init(&ctx->fdsp, avctx);
1143 mps = ctx->mbs_per_slice;
1144 if (mps & (mps - 1)) {
1145 av_log(avctx, AV_LOG_ERROR,
1146 "there should be an integer power of two MBs per slice\n");
1147 return AVERROR(EINVAL);
1149 if (ctx->profile == PRORES_PROFILE_AUTO) {
1150 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1151 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1152 !(desc->log2_chroma_w + desc->log2_chroma_h))
1153 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1154 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1155 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1156 ? "4:4:4:4 profile because of the used input colorspace"
1157 : "HQ profile to keep best quality");
1159 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1160 if (ctx->profile != PRORES_PROFILE_4444) {
1161 // force alpha and warn
1162 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1163 "encode alpha. Override with -profile if needed.\n");
1164 ctx->alpha_bits = 0;
1166 if (ctx->alpha_bits & 7) {
1167 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1168 return AVERROR(EINVAL);
1171 ctx->alpha_bits = 0;
1174 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1177 ctx->profile_info = prores_profile_info + ctx->profile;
1178 ctx->num_planes = 3 + !!ctx->alpha_bits;
1180 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1183 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1185 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1187 ctx->slices_width = ctx->mb_width / mps;
1188 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1189 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1190 ctx->pictures_per_frame = 1 + interlaced;
1192 if (ctx->quant_sel == -1)
1193 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1195 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1197 if (strlen(ctx->vendor) != 4) {
1198 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1199 return AVERROR_INVALIDDATA;
1202 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1203 if (!ctx->force_quant) {
1204 if (!ctx->bits_per_mb) {
1205 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1206 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1207 ctx->pictures_per_frame)
1209 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1210 } else if (ctx->bits_per_mb < 128) {
1211 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1212 return AVERROR_INVALIDDATA;
1215 min_quant = ctx->profile_info->min_quant;
1216 max_quant = ctx->profile_info->max_quant;
1217 for (i = min_quant; i < MAX_STORED_Q; i++) {
1218 for (j = 0; j < 64; j++)
1219 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1222 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1223 if (!ctx->slice_q) {
1224 encode_close(avctx);
1225 return AVERROR(ENOMEM);
1228 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1230 encode_close(avctx);
1231 return AVERROR(ENOMEM);
1234 for (j = 0; j < avctx->thread_count; j++) {
1235 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1237 * sizeof(*ctx->tdata->nodes));
1238 if (!ctx->tdata[j].nodes) {
1239 encode_close(avctx);
1240 return AVERROR(ENOMEM);
1242 for (i = min_quant; i < max_quant + 2; i++) {
1243 ctx->tdata[j].nodes[i].prev_node = -1;
1244 ctx->tdata[j].nodes[i].bits = 0;
1245 ctx->tdata[j].nodes[i].score = 0;
1251 if (ctx->force_quant > 64) {
1252 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1253 return AVERROR_INVALIDDATA;
1256 for (j = 0; j < 64; j++) {
1257 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1258 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1261 ctx->bits_per_mb = ls * 8;
1262 if (ctx->chroma_factor == CFACTOR_Y444)
1263 ctx->bits_per_mb += ls * 4;
1266 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1267 ctx->slices_per_picture + 1) *
1268 (2 + 2 * ctx->num_planes +
1269 (mps * ctx->bits_per_mb) / 8)
1272 if (ctx->alpha_bits) {
1273 // The alpha plane is run-coded and might exceed the bit budget.
1274 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1275 ctx->slices_per_picture + 1) *
1276 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1277 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1280 avctx->codec_tag = ctx->profile_info->tag;
1282 av_log(avctx, AV_LOG_DEBUG,
1283 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1284 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1285 interlaced ? "yes" : "no", ctx->bits_per_mb);
1286 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1287 ctx->frame_size_upper_bound);
1292 #define OFFSET(x) offsetof(ProresContext, x)
1293 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1295 static const AVOption options[] = {
1296 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1297 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1298 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1299 { .i64 = PRORES_PROFILE_AUTO },
1300 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444, VE, "profile" },
1301 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1302 0, 0, VE, "profile" },
1303 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1304 0, 0, VE, "profile" },
1305 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1306 0, 0, VE, "profile" },
1307 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1308 0, 0, VE, "profile" },
1309 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1310 0, 0, VE, "profile" },
1311 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1312 0, 0, VE, "profile" },
1313 { "vendor", "vendor ID", OFFSET(vendor),
1314 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1315 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1316 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1317 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1318 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1319 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1320 0, 0, VE, "quant_mat" },
1321 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1322 0, 0, VE, "quant_mat" },
1323 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1324 0, 0, VE, "quant_mat" },
1325 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1326 0, 0, VE, "quant_mat" },
1327 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1328 0, 0, VE, "quant_mat" },
1329 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1330 0, 0, VE, "quant_mat" },
1331 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1332 { .i64 = 16 }, 0, 16, VE },
1336 static const AVClass proresenc_class = {
1337 .class_name = "ProRes encoder",
1338 .item_name = av_default_item_name,
1340 .version = LIBAVUTIL_VERSION_INT,
1343 AVCodec ff_prores_ks_encoder = {
1344 .name = "prores_ks",
1345 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1346 .type = AVMEDIA_TYPE_VIDEO,
1347 .id = AV_CODEC_ID_PRORES,
1348 .priv_data_size = sizeof(ProresContext),
1349 .init = encode_init,
1350 .close = encode_close,
1351 .encode2 = encode_frame,
1352 .capabilities = CODEC_CAP_SLICE_THREADS,
1353 .pix_fmts = (const enum AVPixelFormat[]) {
1354 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1355 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1357 .priv_class = &proresenc_class,