4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_AUTO = -1,
44 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_STANDARD,
59 static const uint8_t prores_quant_matrices[][64] = {
61 4, 7, 9, 11, 13, 14, 15, 63,
62 7, 7, 11, 12, 14, 15, 63, 63,
63 9, 11, 13, 14, 15, 63, 63, 63,
64 11, 11, 13, 14, 63, 63, 63, 63,
65 11, 13, 14, 63, 63, 63, 63, 63,
66 13, 14, 63, 63, 63, 63, 63, 63,
67 13, 63, 63, 63, 63, 63, 63, 63,
68 63, 63, 63, 63, 63, 63, 63, 63,
71 4, 5, 6, 7, 9, 11, 13, 15,
72 5, 5, 7, 8, 11, 13, 15, 17,
73 6, 7, 9, 11, 13, 15, 15, 17,
74 7, 7, 9, 11, 13, 15, 17, 19,
75 7, 9, 11, 13, 14, 16, 19, 23,
76 9, 11, 13, 14, 16, 19, 23, 29,
77 9, 11, 13, 15, 17, 21, 28, 35,
78 11, 13, 16, 17, 21, 28, 35, 41,
81 4, 4, 5, 5, 6, 7, 7, 9,
82 4, 4, 5, 6, 7, 7, 9, 9,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 6, 7, 7, 8, 9, 10, 12,
86 6, 7, 7, 8, 9, 10, 12, 15,
87 6, 7, 7, 9, 10, 11, 14, 17,
88 7, 7, 9, 10, 11, 14, 17, 21,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 5,
95 4, 4, 4, 4, 4, 4, 5, 5,
96 4, 4, 4, 4, 4, 5, 5, 6,
97 4, 4, 4, 4, 5, 5, 6, 7,
98 4, 4, 4, 4, 5, 6, 7, 7,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
112 #define NUM_MB_LIMITS 4
113 static const int prores_mb_limits[NUM_MB_LIMITS] = {
114 1620, // up to 720x576
115 2700, // up to 960x720
116 6075, // up to 1440x1080
117 9216, // up to 2048x1152
120 static const struct prores_profile {
121 const char *full_name;
125 int br_tab[NUM_MB_LIMITS];
127 } prores_profile_info[5] = {
129 .full_name = "proxy",
130 .tag = MKTAG('a', 'p', 'c', 'o'),
133 .br_tab = { 300, 242, 220, 194 },
134 .quant = QUANT_MAT_PROXY,
138 .tag = MKTAG('a', 'p', 'c', 's'),
141 .br_tab = { 720, 560, 490, 440 },
142 .quant = QUANT_MAT_LT,
145 .full_name = "standard",
146 .tag = MKTAG('a', 'p', 'c', 'n'),
149 .br_tab = { 1050, 808, 710, 632 },
150 .quant = QUANT_MAT_STANDARD,
153 .full_name = "high quality",
154 .tag = MKTAG('a', 'p', 'c', 'h'),
157 .br_tab = { 1566, 1216, 1070, 950 },
158 .quant = QUANT_MAT_HQ,
162 .tag = MKTAG('a', 'p', '4', 'h'),
165 .br_tab = { 2350, 1828, 1600, 1425 },
166 .quant = QUANT_MAT_HQ,
170 #define TRELLIS_WIDTH 16
171 #define SCORE_LIMIT INT_MAX / 2
180 #define MAX_STORED_Q 16
182 typedef struct ProresThreadData {
183 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
184 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
185 int16_t custom_q[64];
186 struct TrellisNode *nodes;
189 typedef struct ProresContext {
191 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
192 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
193 int16_t quants[MAX_STORED_Q][64];
194 int16_t custom_q[64];
195 const uint8_t *quant_mat;
196 const uint8_t *scantable;
198 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
199 int linesize, int16_t *block);
203 int mb_width, mb_height;
205 int num_chroma_blocks, chroma_factor;
207 int slices_per_picture;
208 int pictures_per_frame; // 1 for progressive, 2 for interlaced
219 int frame_size_upper_bound;
222 const struct prores_profile *profile_info;
226 ProresThreadData *tdata;
229 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
230 int linesize, int x, int y, int w, int h,
231 int16_t *blocks, uint16_t *emu_buf,
232 int mbs_per_slice, int blocks_per_mb, int is_chroma)
234 const uint16_t *esrc;
235 const int mb_width = 4 * blocks_per_mb;
239 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
241 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
245 if (x + mb_width <= w && y + 16 <= h) {
247 elinesize = linesize;
252 elinesize = 16 * sizeof(*emu_buf);
254 bw = FFMIN(w - x, mb_width);
255 bh = FFMIN(h - y, 16);
257 for (j = 0; j < bh; j++) {
258 memcpy(emu_buf + j * 16,
259 (const uint8_t*)src + j * linesize,
261 pix = emu_buf[j * 16 + bw - 1];
262 for (k = bw; k < mb_width; k++)
263 emu_buf[j * 16 + k] = pix;
266 memcpy(emu_buf + j * 16,
267 emu_buf + (bh - 1) * 16,
268 mb_width * sizeof(*emu_buf));
271 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
273 if (blocks_per_mb > 2) {
274 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
277 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
279 if (blocks_per_mb > 2) {
280 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
284 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
286 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
288 if (blocks_per_mb > 2) {
289 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
291 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
300 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
301 int linesize, int x, int y, int w, int h,
302 int16_t *blocks, int mbs_per_slice, int abits)
304 const int slice_width = 16 * mbs_per_slice;
305 int i, j, copy_w, copy_h;
307 copy_w = FFMIN(w - x, slice_width);
308 copy_h = FFMIN(h - y, 16);
309 for (i = 0; i < copy_h; i++) {
310 memcpy(blocks, src, copy_w * sizeof(*src));
312 for (j = 0; j < copy_w; j++)
315 for (j = 0; j < copy_w; j++)
316 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
317 for (j = copy_w; j < slice_width; j++)
318 blocks[j] = blocks[copy_w - 1];
319 blocks += slice_width;
320 src += linesize >> 1;
322 for (; i < 16; i++) {
323 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
324 blocks += slice_width;
329 * Write an unsigned rice/exp golomb codeword.
331 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
333 unsigned int rice_order, exp_order, switch_bits, switch_val;
336 /* number of prefix bits to switch between Rice and expGolomb */
337 switch_bits = (codebook & 3) + 1;
338 rice_order = codebook >> 5; /* rice code order */
339 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
341 switch_val = switch_bits << rice_order;
343 if (val >= switch_val) {
344 val -= switch_val - (1 << exp_order);
345 exponent = av_log2(val);
347 put_bits(pb, exponent - exp_order + switch_bits, 0);
348 put_bits(pb, exponent + 1, val);
350 exponent = val >> rice_order;
353 put_bits(pb, exponent, 0);
356 put_sbits(pb, rice_order, val);
360 #define GET_SIGN(x) ((x) >> 31)
361 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
363 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
364 int blocks_per_slice, int scale)
367 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
369 prev_dc = (blocks[0] - 0x4000) / scale;
370 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
375 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
376 dc = (blocks[0] - 0x4000) / scale;
377 delta = dc - prev_dc;
378 new_sign = GET_SIGN(delta);
379 delta = (delta ^ sign) - sign;
380 code = MAKE_CODE(delta);
381 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
382 codebook = (code + (code & 1)) >> 1;
383 codebook = FFMIN(codebook, 3);
389 static void encode_acs(PutBitContext *pb, int16_t *blocks,
390 int blocks_per_slice,
391 int plane_size_factor,
392 const uint8_t *scan, const int16_t *qmat)
395 int run, level, run_cb, lev_cb;
396 int max_coeffs, abs_level;
398 max_coeffs = blocks_per_slice << 6;
399 run_cb = ff_prores_run_to_cb_index[4];
400 lev_cb = ff_prores_lev_to_cb_index[2];
403 for (i = 1; i < 64; i++) {
404 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
405 level = blocks[idx] / qmat[scan[i]];
407 abs_level = FFABS(level);
408 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
409 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
411 put_sbits(pb, 1, GET_SIGN(level));
413 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
414 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
423 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
424 const uint16_t *src, int linesize,
425 int mbs_per_slice, int16_t *blocks,
426 int blocks_per_mb, int plane_size_factor,
429 int blocks_per_slice, saved_pos;
431 saved_pos = put_bits_count(pb);
432 blocks_per_slice = mbs_per_slice * blocks_per_mb;
434 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
435 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
436 ctx->scantable, qmat);
439 return (put_bits_count(pb) - saved_pos) >> 3;
442 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
444 const int dbits = (abits == 8) ? 4 : 7;
445 const int dsize = 1 << dbits - 1;
446 int diff = cur - prev;
448 diff = av_mod_uintp2(diff, abits);
449 if (diff >= (1 << abits) - dsize)
451 if (diff < -dsize || diff > dsize || !diff) {
453 put_bits(pb, abits, diff);
456 put_bits(pb, dbits - 1, FFABS(diff) - 1);
457 put_bits(pb, 1, diff < 0);
461 static void put_alpha_run(PutBitContext *pb, int run)
466 put_bits(pb, 4, run);
468 put_bits(pb, 15, run);
474 // todo alpha quantisation for high quants
475 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
476 int mbs_per_slice, uint16_t *blocks,
479 const int abits = ctx->alpha_bits;
480 const int mask = (1 << abits) - 1;
481 const int num_coeffs = mbs_per_slice * 256;
482 int saved_pos = put_bits_count(pb);
483 int prev = mask, cur;
488 put_alpha_diff(pb, cur, prev, abits);
493 put_alpha_run (pb, run);
494 put_alpha_diff(pb, cur, prev, abits);
500 } while (idx < num_coeffs);
502 put_alpha_run(pb, run);
504 return (put_bits_count(pb) - saved_pos) >> 3;
507 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
509 int sizes[4], int x, int y, int quant,
512 ProresContext *ctx = avctx->priv_data;
516 int slice_width_factor = av_log2(mbs_per_slice);
517 int num_cblocks, pwidth, linesize, line_add;
518 int plane_factor, is_chroma;
521 if (ctx->pictures_per_frame == 1)
524 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
526 if (ctx->force_quant) {
527 qmat = ctx->quants[0];
528 } else if (quant < MAX_STORED_Q) {
529 qmat = ctx->quants[quant];
531 qmat = ctx->custom_q;
532 for (i = 0; i < 64; i++)
533 qmat[i] = ctx->quant_mat[i] * quant;
536 for (i = 0; i < ctx->num_planes; i++) {
537 is_chroma = (i == 1 || i == 2);
538 plane_factor = slice_width_factor + 2;
540 plane_factor += ctx->chroma_factor - 3;
541 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
545 pwidth = avctx->width;
550 pwidth = avctx->width >> 1;
553 linesize = pic->linesize[i] * ctx->pictures_per_frame;
554 src = (const uint16_t*)(pic->data[i] + yp * linesize +
555 line_add * pic->linesize[i]) + xp;
558 get_slice_data(ctx, src, linesize, xp, yp,
559 pwidth, avctx->height / ctx->pictures_per_frame,
560 ctx->blocks[0], ctx->emu_buf,
561 mbs_per_slice, num_cblocks, is_chroma);
562 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
563 mbs_per_slice, ctx->blocks[0],
564 num_cblocks, plane_factor,
567 get_alpha_data(ctx, src, linesize, xp, yp,
568 pwidth, avctx->height / ctx->pictures_per_frame,
569 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
570 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
571 ctx->blocks[0], quant);
573 total_size += sizes[i];
574 if (put_bits_left(pb) < 0) {
575 av_log(avctx, AV_LOG_ERROR,
576 "Underestimated required buffer size.\n");
583 static inline int estimate_vlc(unsigned codebook, int val)
585 unsigned int rice_order, exp_order, switch_bits, switch_val;
588 /* number of prefix bits to switch between Rice and expGolomb */
589 switch_bits = (codebook & 3) + 1;
590 rice_order = codebook >> 5; /* rice code order */
591 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
593 switch_val = switch_bits << rice_order;
595 if (val >= switch_val) {
596 val -= switch_val - (1 << exp_order);
597 exponent = av_log2(val);
599 return exponent * 2 - exp_order + switch_bits + 1;
601 return (val >> rice_order) + rice_order + 1;
605 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
609 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
612 prev_dc = (blocks[0] - 0x4000) / scale;
613 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
617 *error += FFABS(blocks[0] - 0x4000) % scale;
619 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
620 dc = (blocks[0] - 0x4000) / scale;
621 *error += FFABS(blocks[0] - 0x4000) % scale;
622 delta = dc - prev_dc;
623 new_sign = GET_SIGN(delta);
624 delta = (delta ^ sign) - sign;
625 code = MAKE_CODE(delta);
626 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
627 codebook = (code + (code & 1)) >> 1;
628 codebook = FFMIN(codebook, 3);
636 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
637 int plane_size_factor,
638 const uint8_t *scan, const int16_t *qmat)
641 int run, level, run_cb, lev_cb;
642 int max_coeffs, abs_level;
645 max_coeffs = blocks_per_slice << 6;
646 run_cb = ff_prores_run_to_cb_index[4];
647 lev_cb = ff_prores_lev_to_cb_index[2];
650 for (i = 1; i < 64; i++) {
651 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
652 level = blocks[idx] / qmat[scan[i]];
653 *error += FFABS(blocks[idx]) % qmat[scan[i]];
655 abs_level = FFABS(level);
656 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
657 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
660 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
661 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
672 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
673 const uint16_t *src, int linesize,
675 int blocks_per_mb, int plane_size_factor,
676 const int16_t *qmat, ProresThreadData *td)
678 int blocks_per_slice;
681 blocks_per_slice = mbs_per_slice * blocks_per_mb;
683 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
684 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
685 plane_size_factor, ctx->scantable, qmat);
687 return FFALIGN(bits, 8);
690 static int est_alpha_diff(int cur, int prev, int abits)
692 const int dbits = (abits == 8) ? 4 : 7;
693 const int dsize = 1 << dbits - 1;
694 int diff = cur - prev;
696 diff = av_mod_uintp2(diff, abits);
697 if (diff >= (1 << abits) - dsize)
699 if (diff < -dsize || diff > dsize || !diff)
705 static int estimate_alpha_plane(ProresContext *ctx, int *error,
706 const uint16_t *src, int linesize,
707 int mbs_per_slice, int quant,
710 const int abits = ctx->alpha_bits;
711 const int mask = (1 << abits) - 1;
712 const int num_coeffs = mbs_per_slice * 256;
713 int prev = mask, cur;
720 bits = est_alpha_diff(cur, prev, abits);
731 bits += est_alpha_diff(cur, prev, abits);
737 } while (idx < num_coeffs);
749 static int find_slice_quant(AVCodecContext *avctx,
750 int trellis_node, int x, int y, int mbs_per_slice,
751 ProresThreadData *td)
753 ProresContext *ctx = avctx->priv_data;
754 int i, q, pq, xp, yp;
756 int slice_width_factor = av_log2(mbs_per_slice);
757 int num_cblocks[MAX_PLANES], pwidth;
758 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
759 const int min_quant = ctx->profile_info->min_quant;
760 const int max_quant = ctx->profile_info->max_quant;
761 int error, bits, bits_limit;
762 int mbs, prev, cur, new_score;
763 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
766 int linesize[4], line_add;
768 if (ctx->pictures_per_frame == 1)
771 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
772 mbs = x + mbs_per_slice;
774 for (i = 0; i < ctx->num_planes; i++) {
775 is_chroma[i] = (i == 1 || i == 2);
776 plane_factor[i] = slice_width_factor + 2;
778 plane_factor[i] += ctx->chroma_factor - 3;
779 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
783 pwidth = avctx->width;
788 pwidth = avctx->width >> 1;
791 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
792 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
793 line_add * ctx->pic->linesize[i]) + xp;
796 get_slice_data(ctx, src, linesize[i], xp, yp,
797 pwidth, avctx->height / ctx->pictures_per_frame,
798 td->blocks[i], td->emu_buf,
799 mbs_per_slice, num_cblocks[i], is_chroma[i]);
801 get_alpha_data(ctx, src, linesize[i], xp, yp,
802 pwidth, avctx->height / ctx->pictures_per_frame,
803 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
807 for (q = min_quant; q < max_quant + 2; q++) {
808 td->nodes[trellis_node + q].prev_node = -1;
809 td->nodes[trellis_node + q].quant = q;
812 // todo: maybe perform coarser quantising to fit into frame size when needed
813 for (q = min_quant; q <= max_quant; q++) {
816 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
817 bits += estimate_slice_plane(ctx, &error, i,
820 num_cblocks[i], plane_factor[i],
824 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
825 mbs_per_slice, q, td->blocks[3]);
826 if (bits > 65000 * 8)
829 slice_bits[q] = bits;
830 slice_score[q] = error;
832 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
833 slice_bits[max_quant + 1] = slice_bits[max_quant];
834 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
835 overquant = max_quant;
837 for (q = max_quant + 1; q < 128; q++) {
840 if (q < MAX_STORED_Q) {
841 qmat = ctx->quants[q];
844 for (i = 0; i < 64; i++)
845 qmat[i] = ctx->quant_mat[i] * q;
847 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
848 bits += estimate_slice_plane(ctx, &error, i,
851 num_cblocks[i], plane_factor[i],
855 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
856 mbs_per_slice, q, td->blocks[3]);
857 if (bits <= ctx->bits_per_mb * mbs_per_slice)
861 slice_bits[max_quant + 1] = bits;
862 slice_score[max_quant + 1] = error;
865 td->nodes[trellis_node + max_quant + 1].quant = overquant;
867 bits_limit = mbs * ctx->bits_per_mb;
868 for (pq = min_quant; pq < max_quant + 2; pq++) {
869 prev = trellis_node - TRELLIS_WIDTH + pq;
871 for (q = min_quant; q < max_quant + 2; q++) {
872 cur = trellis_node + q;
874 bits = td->nodes[prev].bits + slice_bits[q];
875 error = slice_score[q];
876 if (bits > bits_limit)
879 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
880 new_score = td->nodes[prev].score + error;
882 new_score = SCORE_LIMIT;
883 if (td->nodes[cur].prev_node == -1 ||
884 td->nodes[cur].score >= new_score) {
886 td->nodes[cur].bits = bits;
887 td->nodes[cur].score = new_score;
888 td->nodes[cur].prev_node = prev;
893 error = td->nodes[trellis_node + min_quant].score;
894 pq = trellis_node + min_quant;
895 for (q = min_quant + 1; q < max_quant + 2; q++) {
896 if (td->nodes[trellis_node + q].score <= error) {
897 error = td->nodes[trellis_node + q].score;
898 pq = trellis_node + q;
905 static int find_quant_thread(AVCodecContext *avctx, void *arg,
906 int jobnr, int threadnr)
908 ProresContext *ctx = avctx->priv_data;
909 ProresThreadData *td = ctx->tdata + threadnr;
910 int mbs_per_slice = ctx->mbs_per_slice;
911 int x, y = jobnr, mb, q = 0;
913 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
914 while (ctx->mb_width - x < mbs_per_slice)
916 q = find_slice_quant(avctx,
917 (mb + 1) * TRELLIS_WIDTH, x, y,
921 for (x = ctx->slices_width - 1; x >= 0; x--) {
922 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
923 q = td->nodes[q].prev_node;
929 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
930 const AVFrame *pic, int *got_packet)
932 ProresContext *ctx = avctx->priv_data;
933 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
934 uint8_t *picture_size_pos;
936 int x, y, i, mb, q = 0;
937 int sizes[4] = { 0 };
938 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
939 int frame_size, picture_size, slice_size;
941 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
945 pkt_size = ctx->frame_size_upper_bound;
947 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
950 orig_buf = pkt->data;
953 orig_buf += 4; // frame size
954 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
959 buf += 2; // frame header size will be stored here
960 bytestream_put_be16 (&buf, 0); // version 1
961 bytestream_put_buffer(&buf, ctx->vendor, 4);
962 bytestream_put_be16 (&buf, avctx->width);
963 bytestream_put_be16 (&buf, avctx->height);
965 frame_flags = ctx->chroma_factor << 6;
966 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
967 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
968 bytestream_put_byte (&buf, frame_flags);
970 bytestream_put_byte (&buf, 0); // reserved
971 bytestream_put_byte (&buf, avctx->color_primaries);
972 bytestream_put_byte (&buf, avctx->color_trc);
973 bytestream_put_byte (&buf, avctx->colorspace);
974 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
975 bytestream_put_byte (&buf, 0); // reserved
976 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
977 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
978 // luma quantisation matrix
979 for (i = 0; i < 64; i++)
980 bytestream_put_byte(&buf, ctx->quant_mat[i]);
981 // chroma quantisation matrix
982 for (i = 0; i < 64; i++)
983 bytestream_put_byte(&buf, ctx->quant_mat[i]);
985 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
987 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
989 for (ctx->cur_picture_idx = 0;
990 ctx->cur_picture_idx < ctx->pictures_per_frame;
991 ctx->cur_picture_idx++) {
993 picture_size_pos = buf + 1;
994 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
995 buf += 4; // picture data size will be stored here
996 bytestream_put_be16 (&buf, ctx->slices_per_picture);
997 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
999 // seek table - will be filled during slice encoding
1001 buf += ctx->slices_per_picture * 2;
1004 if (!ctx->force_quant) {
1005 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1011 for (y = 0; y < ctx->mb_height; y++) {
1012 int mbs_per_slice = ctx->mbs_per_slice;
1013 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1014 q = ctx->force_quant ? ctx->force_quant
1015 : ctx->slice_q[mb + y * ctx->slices_width];
1017 while (ctx->mb_width - x < mbs_per_slice)
1018 mbs_per_slice >>= 1;
1020 bytestream_put_byte(&buf, slice_hdr_size << 3);
1022 buf += slice_hdr_size - 1;
1023 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1024 uint8_t *start = pkt->data;
1025 // Recompute new size according to max_slice_size
1027 int delta = 200 + (ctx->pictures_per_frame *
1028 ctx->slices_per_picture + 1) *
1029 max_slice_size - pkt_size;
1031 delta = FFMAX(delta, 2 * max_slice_size);
1032 ctx->frame_size_upper_bound += delta;
1035 avpriv_request_sample(avctx,
1036 "Packet too small: is %i,"
1037 " needs %i (slice: %i). "
1038 "Correct allocation",
1039 pkt_size, delta, max_slice_size);
1043 ret = av_grow_packet(pkt, delta);
1049 orig_buf = pkt->data + (orig_buf - start);
1050 buf = pkt->data + (buf - start);
1051 picture_size_pos = pkt->data + (picture_size_pos - start);
1052 slice_sizes = pkt->data + (slice_sizes - start);
1053 slice_hdr = pkt->data + (slice_hdr - start);
1054 tmp = pkt->data + (tmp - start);
1056 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1057 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1062 bytestream_put_byte(&slice_hdr, q);
1063 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1064 for (i = 0; i < ctx->num_planes - 1; i++) {
1065 bytestream_put_be16(&slice_hdr, sizes[i]);
1066 slice_size += sizes[i];
1068 bytestream_put_be16(&slice_sizes, slice_size);
1069 buf += slice_size - slice_hdr_size;
1070 if (max_slice_size < slice_size)
1071 max_slice_size = slice_size;
1075 picture_size = buf - (picture_size_pos - 1);
1076 bytestream_put_be32(&picture_size_pos, picture_size);
1080 frame_size = buf - orig_buf;
1081 bytestream_put_be32(&orig_buf, frame_size);
1083 pkt->size = frame_size;
1084 pkt->flags |= AV_PKT_FLAG_KEY;
1090 static av_cold int encode_close(AVCodecContext *avctx)
1092 ProresContext *ctx = avctx->priv_data;
1096 for (i = 0; i < avctx->thread_count; i++)
1097 av_freep(&ctx->tdata[i].nodes);
1099 av_freep(&ctx->tdata);
1100 av_freep(&ctx->slice_q);
1105 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1106 int linesize, int16_t *block)
1109 const uint16_t *tsrc = src;
1111 for (y = 0; y < 8; y++) {
1112 for (x = 0; x < 8; x++)
1113 block[y * 8 + x] = tsrc[x];
1114 tsrc += linesize >> 1;
1119 static av_cold int encode_init(AVCodecContext *avctx)
1121 ProresContext *ctx = avctx->priv_data;
1124 int min_quant, max_quant;
1125 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1127 avctx->bits_per_raw_sample = 10;
1128 #if FF_API_CODED_FRAME
1129 FF_DISABLE_DEPRECATION_WARNINGS
1130 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1131 avctx->coded_frame->key_frame = 1;
1132 FF_ENABLE_DEPRECATION_WARNINGS
1135 ctx->fdct = prores_fdct;
1136 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1137 : ff_prores_progressive_scan;
1138 ff_fdctdsp_init(&ctx->fdsp, avctx);
1140 mps = ctx->mbs_per_slice;
1141 if (mps & (mps - 1)) {
1142 av_log(avctx, AV_LOG_ERROR,
1143 "there should be an integer power of two MBs per slice\n");
1144 return AVERROR(EINVAL);
1146 if (ctx->profile == PRORES_PROFILE_AUTO) {
1147 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1148 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1149 !(desc->log2_chroma_w + desc->log2_chroma_h))
1150 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1151 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1152 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1153 ? "4:4:4:4 profile because of the used input colorspace"
1154 : "HQ profile to keep best quality");
1156 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1157 if (ctx->profile != PRORES_PROFILE_4444) {
1158 // force alpha and warn
1159 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1160 "encode alpha. Override with -profile if needed.\n");
1161 ctx->alpha_bits = 0;
1163 if (ctx->alpha_bits & 7) {
1164 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1165 return AVERROR(EINVAL);
1167 avctx->bits_per_coded_sample = 32;
1169 ctx->alpha_bits = 0;
1172 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1175 ctx->profile_info = prores_profile_info + ctx->profile;
1176 ctx->num_planes = 3 + !!ctx->alpha_bits;
1178 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1181 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1183 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1185 ctx->slices_width = ctx->mb_width / mps;
1186 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1187 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1188 ctx->pictures_per_frame = 1 + interlaced;
1190 if (ctx->quant_sel == -1)
1191 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1193 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1195 if (strlen(ctx->vendor) != 4) {
1196 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1197 return AVERROR_INVALIDDATA;
1200 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1201 if (!ctx->force_quant) {
1202 if (!ctx->bits_per_mb) {
1203 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1204 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1205 ctx->pictures_per_frame)
1207 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1208 } else if (ctx->bits_per_mb < 128) {
1209 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1210 return AVERROR_INVALIDDATA;
1213 min_quant = ctx->profile_info->min_quant;
1214 max_quant = ctx->profile_info->max_quant;
1215 for (i = min_quant; i < MAX_STORED_Q; i++) {
1216 for (j = 0; j < 64; j++)
1217 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1220 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1221 if (!ctx->slice_q) {
1222 encode_close(avctx);
1223 return AVERROR(ENOMEM);
1226 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1228 encode_close(avctx);
1229 return AVERROR(ENOMEM);
1232 for (j = 0; j < avctx->thread_count; j++) {
1233 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1235 * sizeof(*ctx->tdata->nodes));
1236 if (!ctx->tdata[j].nodes) {
1237 encode_close(avctx);
1238 return AVERROR(ENOMEM);
1240 for (i = min_quant; i < max_quant + 2; i++) {
1241 ctx->tdata[j].nodes[i].prev_node = -1;
1242 ctx->tdata[j].nodes[i].bits = 0;
1243 ctx->tdata[j].nodes[i].score = 0;
1249 if (ctx->force_quant > 64) {
1250 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1251 return AVERROR_INVALIDDATA;
1254 for (j = 0; j < 64; j++) {
1255 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1256 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1259 ctx->bits_per_mb = ls * 8;
1260 if (ctx->chroma_factor == CFACTOR_Y444)
1261 ctx->bits_per_mb += ls * 4;
1264 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1265 ctx->slices_per_picture + 1) *
1266 (2 + 2 * ctx->num_planes +
1267 (mps * ctx->bits_per_mb) / 8)
1270 if (ctx->alpha_bits) {
1271 // The alpha plane is run-coded and might exceed the bit budget.
1272 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1273 ctx->slices_per_picture + 1) *
1274 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1275 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1278 avctx->codec_tag = ctx->profile_info->tag;
1280 av_log(avctx, AV_LOG_DEBUG,
1281 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1282 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1283 interlaced ? "yes" : "no", ctx->bits_per_mb);
1284 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1285 ctx->frame_size_upper_bound);
1290 #define OFFSET(x) offsetof(ProresContext, x)
1291 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1293 static const AVOption options[] = {
1294 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1295 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1296 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1297 { .i64 = PRORES_PROFILE_AUTO },
1298 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444, VE, "profile" },
1299 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1300 0, 0, VE, "profile" },
1301 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1302 0, 0, VE, "profile" },
1303 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1304 0, 0, VE, "profile" },
1305 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1306 0, 0, VE, "profile" },
1307 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1308 0, 0, VE, "profile" },
1309 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1310 0, 0, VE, "profile" },
1311 { "vendor", "vendor ID", OFFSET(vendor),
1312 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1313 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1314 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1315 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1316 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1317 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1318 0, 0, VE, "quant_mat" },
1319 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1320 0, 0, VE, "quant_mat" },
1321 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1322 0, 0, VE, "quant_mat" },
1323 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1324 0, 0, VE, "quant_mat" },
1325 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1326 0, 0, VE, "quant_mat" },
1327 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1328 0, 0, VE, "quant_mat" },
1329 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1330 { .i64 = 16 }, 0, 16, VE },
1334 static const AVClass proresenc_class = {
1335 .class_name = "ProRes encoder",
1336 .item_name = av_default_item_name,
1338 .version = LIBAVUTIL_VERSION_INT,
1341 AVCodec ff_prores_ks_encoder = {
1342 .name = "prores_ks",
1343 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1344 .type = AVMEDIA_TYPE_VIDEO,
1345 .id = AV_CODEC_ID_PRORES,
1346 .priv_data_size = sizeof(ProresContext),
1347 .init = encode_init,
1348 .close = encode_close,
1349 .encode2 = encode_frame,
1350 .capabilities = AV_CODEC_CAP_SLICE_THREADS,
1351 .pix_fmts = (const enum AVPixelFormat[]) {
1352 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1353 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1355 .priv_class = &proresenc_class,