4 * Copyright (c) 2012 Konstantin Shishkov
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/opt.h"
27 #include "libavutil/pixdesc.h"
31 #include "bytestream.h"
33 #include "proresdata.h"
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
38 #define MAX_MBS_PER_SLICE 8
43 PRORES_PROFILE_AUTO = -1,
44 PRORES_PROFILE_PROXY = 0,
46 PRORES_PROFILE_STANDARD,
59 static const uint8_t prores_quant_matrices[][64] = {
61 4, 7, 9, 11, 13, 14, 15, 63,
62 7, 7, 11, 12, 14, 15, 63, 63,
63 9, 11, 13, 14, 15, 63, 63, 63,
64 11, 11, 13, 14, 63, 63, 63, 63,
65 11, 13, 14, 63, 63, 63, 63, 63,
66 13, 14, 63, 63, 63, 63, 63, 63,
67 13, 63, 63, 63, 63, 63, 63, 63,
68 63, 63, 63, 63, 63, 63, 63, 63,
71 4, 5, 6, 7, 9, 11, 13, 15,
72 5, 5, 7, 8, 11, 13, 15, 17,
73 6, 7, 9, 11, 13, 15, 15, 17,
74 7, 7, 9, 11, 13, 15, 17, 19,
75 7, 9, 11, 13, 14, 16, 19, 23,
76 9, 11, 13, 14, 16, 19, 23, 29,
77 9, 11, 13, 15, 17, 21, 28, 35,
78 11, 13, 16, 17, 21, 28, 35, 41,
81 4, 4, 5, 5, 6, 7, 7, 9,
82 4, 4, 5, 6, 7, 7, 9, 9,
83 5, 5, 6, 7, 7, 9, 9, 10,
84 5, 5, 6, 7, 7, 9, 9, 10,
85 5, 6, 7, 7, 8, 9, 10, 12,
86 6, 7, 7, 8, 9, 10, 12, 15,
87 6, 7, 7, 9, 10, 11, 14, 17,
88 7, 7, 9, 10, 11, 14, 17, 21,
91 4, 4, 4, 4, 4, 4, 4, 4,
92 4, 4, 4, 4, 4, 4, 4, 4,
93 4, 4, 4, 4, 4, 4, 4, 4,
94 4, 4, 4, 4, 4, 4, 4, 5,
95 4, 4, 4, 4, 4, 4, 5, 5,
96 4, 4, 4, 4, 4, 5, 5, 6,
97 4, 4, 4, 4, 5, 5, 6, 7,
98 4, 4, 4, 4, 5, 6, 7, 7,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 4, 4, 4, 4, 4,
106 4, 4, 4, 4, 4, 4, 4, 4,
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
112 #define NUM_MB_LIMITS 4
113 static const int prores_mb_limits[NUM_MB_LIMITS] = {
114 1620, // up to 720x576
115 2700, // up to 960x720
116 6075, // up to 1440x1080
117 9216, // up to 2048x1152
120 static const struct prores_profile {
121 const char *full_name;
125 int br_tab[NUM_MB_LIMITS];
127 } prores_profile_info[5] = {
129 .full_name = "proxy",
130 .tag = MKTAG('a', 'p', 'c', 'o'),
133 .br_tab = { 300, 242, 220, 194 },
134 .quant = QUANT_MAT_PROXY,
138 .tag = MKTAG('a', 'p', 'c', 's'),
141 .br_tab = { 720, 560, 490, 440 },
142 .quant = QUANT_MAT_LT,
145 .full_name = "standard",
146 .tag = MKTAG('a', 'p', 'c', 'n'),
149 .br_tab = { 1050, 808, 710, 632 },
150 .quant = QUANT_MAT_STANDARD,
153 .full_name = "high quality",
154 .tag = MKTAG('a', 'p', 'c', 'h'),
157 .br_tab = { 1566, 1216, 1070, 950 },
158 .quant = QUANT_MAT_HQ,
162 .tag = MKTAG('a', 'p', '4', 'h'),
165 .br_tab = { 2350, 1828, 1600, 1425 },
166 .quant = QUANT_MAT_HQ,
170 #define TRELLIS_WIDTH 16
171 #define SCORE_LIMIT INT_MAX / 2
180 #define MAX_STORED_Q 16
182 typedef struct ProresThreadData {
183 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
184 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
185 int16_t custom_q[64];
186 struct TrellisNode *nodes;
189 typedef struct ProresContext {
191 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
192 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
193 int16_t quants[MAX_STORED_Q][64];
194 int16_t custom_q[64];
195 const uint8_t *quant_mat;
196 const uint8_t *scantable;
198 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
199 int linesize, int16_t *block);
202 int mb_width, mb_height;
204 int num_chroma_blocks, chroma_factor;
206 int slices_per_picture;
207 int pictures_per_frame; // 1 for progressive, 2 for interlaced
218 int frame_size_upper_bound;
221 const struct prores_profile *profile_info;
225 ProresThreadData *tdata;
228 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
229 int linesize, int x, int y, int w, int h,
230 int16_t *blocks, uint16_t *emu_buf,
231 int mbs_per_slice, int blocks_per_mb, int is_chroma)
233 const uint16_t *esrc;
234 const int mb_width = 4 * blocks_per_mb;
238 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
240 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
244 if (x + mb_width <= w && y + 16 <= h) {
246 elinesize = linesize;
251 elinesize = 16 * sizeof(*emu_buf);
253 bw = FFMIN(w - x, mb_width);
254 bh = FFMIN(h - y, 16);
256 for (j = 0; j < bh; j++) {
257 memcpy(emu_buf + j * 16,
258 (const uint8_t*)src + j * linesize,
260 pix = emu_buf[j * 16 + bw - 1];
261 for (k = bw; k < mb_width; k++)
262 emu_buf[j * 16 + k] = pix;
265 memcpy(emu_buf + j * 16,
266 emu_buf + (bh - 1) * 16,
267 mb_width * sizeof(*emu_buf));
270 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
272 if (blocks_per_mb > 2) {
273 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
276 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
278 if (blocks_per_mb > 2) {
279 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
283 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
285 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
287 if (blocks_per_mb > 2) {
288 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
290 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
299 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
300 int linesize, int x, int y, int w, int h,
301 int16_t *blocks, int mbs_per_slice, int abits)
303 const int slice_width = 16 * mbs_per_slice;
304 int i, j, copy_w, copy_h;
306 copy_w = FFMIN(w - x, slice_width);
307 copy_h = FFMIN(h - y, 16);
308 for (i = 0; i < copy_h; i++) {
309 memcpy(blocks, src, copy_w * sizeof(*src));
311 for (j = 0; j < copy_w; j++)
314 for (j = 0; j < copy_w; j++)
315 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
316 for (j = copy_w; j < slice_width; j++)
317 blocks[j] = blocks[copy_w - 1];
318 blocks += slice_width;
319 src += linesize >> 1;
321 for (; i < 16; i++) {
322 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
323 blocks += slice_width;
328 * Write an unsigned rice/exp golomb codeword.
330 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
332 unsigned int rice_order, exp_order, switch_bits, switch_val;
335 /* number of prefix bits to switch between Rice and expGolomb */
336 switch_bits = (codebook & 3) + 1;
337 rice_order = codebook >> 5; /* rice code order */
338 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
340 switch_val = switch_bits << rice_order;
342 if (val >= switch_val) {
343 val -= switch_val - (1 << exp_order);
344 exponent = av_log2(val);
346 put_bits(pb, exponent - exp_order + switch_bits, 0);
347 put_bits(pb, exponent + 1, val);
349 exponent = val >> rice_order;
352 put_bits(pb, exponent, 0);
355 put_sbits(pb, rice_order, val);
359 #define GET_SIGN(x) ((x) >> 31)
360 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
362 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
363 int blocks_per_slice, int scale)
366 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
368 prev_dc = (blocks[0] - 0x4000) / scale;
369 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
374 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
375 dc = (blocks[0] - 0x4000) / scale;
376 delta = dc - prev_dc;
377 new_sign = GET_SIGN(delta);
378 delta = (delta ^ sign) - sign;
379 code = MAKE_CODE(delta);
380 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
381 codebook = (code + (code & 1)) >> 1;
382 codebook = FFMIN(codebook, 3);
388 static void encode_acs(PutBitContext *pb, int16_t *blocks,
389 int blocks_per_slice,
390 int plane_size_factor,
391 const uint8_t *scan, const int16_t *qmat)
394 int run, level, run_cb, lev_cb;
395 int max_coeffs, abs_level;
397 max_coeffs = blocks_per_slice << 6;
398 run_cb = ff_prores_run_to_cb_index[4];
399 lev_cb = ff_prores_lev_to_cb_index[2];
402 for (i = 1; i < 64; i++) {
403 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
404 level = blocks[idx] / qmat[scan[i]];
406 abs_level = FFABS(level);
407 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
408 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
410 put_sbits(pb, 1, GET_SIGN(level));
412 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
413 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
422 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
423 const uint16_t *src, int linesize,
424 int mbs_per_slice, int16_t *blocks,
425 int blocks_per_mb, int plane_size_factor,
428 int blocks_per_slice, saved_pos;
430 saved_pos = put_bits_count(pb);
431 blocks_per_slice = mbs_per_slice * blocks_per_mb;
433 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
434 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
435 ctx->scantable, qmat);
438 return (put_bits_count(pb) - saved_pos) >> 3;
441 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
443 const int dbits = (abits == 8) ? 4 : 7;
444 const int dsize = 1 << dbits - 1;
445 int diff = cur - prev;
447 diff = av_mod_uintp2(diff, abits);
448 if (diff >= (1 << abits) - dsize)
450 if (diff < -dsize || diff > dsize || !diff) {
452 put_bits(pb, abits, diff);
455 put_bits(pb, dbits - 1, FFABS(diff) - 1);
456 put_bits(pb, 1, diff < 0);
460 static void put_alpha_run(PutBitContext *pb, int run)
465 put_bits(pb, 4, run);
467 put_bits(pb, 15, run);
473 // todo alpha quantisation for high quants
474 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
475 int mbs_per_slice, uint16_t *blocks,
478 const int abits = ctx->alpha_bits;
479 const int mask = (1 << abits) - 1;
480 const int num_coeffs = mbs_per_slice * 256;
481 int saved_pos = put_bits_count(pb);
482 int prev = mask, cur;
487 put_alpha_diff(pb, cur, prev, abits);
492 put_alpha_run (pb, run);
493 put_alpha_diff(pb, cur, prev, abits);
499 } while (idx < num_coeffs);
501 put_alpha_run(pb, run);
503 return (put_bits_count(pb) - saved_pos) >> 3;
506 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
508 int sizes[4], int x, int y, int quant,
511 ProresContext *ctx = avctx->priv_data;
515 int slice_width_factor = av_log2(mbs_per_slice);
516 int num_cblocks, pwidth, linesize, line_add;
517 int plane_factor, is_chroma;
520 if (ctx->pictures_per_frame == 1)
523 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
525 if (ctx->force_quant) {
526 qmat = ctx->quants[0];
527 } else if (quant < MAX_STORED_Q) {
528 qmat = ctx->quants[quant];
530 qmat = ctx->custom_q;
531 for (i = 0; i < 64; i++)
532 qmat[i] = ctx->quant_mat[i] * quant;
535 for (i = 0; i < ctx->num_planes; i++) {
536 is_chroma = (i == 1 || i == 2);
537 plane_factor = slice_width_factor + 2;
539 plane_factor += ctx->chroma_factor - 3;
540 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
544 pwidth = avctx->width;
549 pwidth = avctx->width >> 1;
552 linesize = pic->linesize[i] * ctx->pictures_per_frame;
553 src = (const uint16_t*)(pic->data[i] + yp * linesize +
554 line_add * pic->linesize[i]) + xp;
557 get_slice_data(ctx, src, linesize, xp, yp,
558 pwidth, avctx->height / ctx->pictures_per_frame,
559 ctx->blocks[0], ctx->emu_buf,
560 mbs_per_slice, num_cblocks, is_chroma);
561 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
562 mbs_per_slice, ctx->blocks[0],
563 num_cblocks, plane_factor,
566 get_alpha_data(ctx, src, linesize, xp, yp,
567 pwidth, avctx->height / ctx->pictures_per_frame,
568 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
569 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
570 ctx->blocks[0], quant);
572 total_size += sizes[i];
573 if (put_bits_left(pb) < 0) {
574 av_log(avctx, AV_LOG_ERROR,
575 "Underestimated required buffer size.\n");
582 static inline int estimate_vlc(unsigned codebook, int val)
584 unsigned int rice_order, exp_order, switch_bits, switch_val;
587 /* number of prefix bits to switch between Rice and expGolomb */
588 switch_bits = (codebook & 3) + 1;
589 rice_order = codebook >> 5; /* rice code order */
590 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
592 switch_val = switch_bits << rice_order;
594 if (val >= switch_val) {
595 val -= switch_val - (1 << exp_order);
596 exponent = av_log2(val);
598 return exponent * 2 - exp_order + switch_bits + 1;
600 return (val >> rice_order) + rice_order + 1;
604 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
608 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
611 prev_dc = (blocks[0] - 0x4000) / scale;
612 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
616 *error += FFABS(blocks[0] - 0x4000) % scale;
618 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
619 dc = (blocks[0] - 0x4000) / scale;
620 *error += FFABS(blocks[0] - 0x4000) % scale;
621 delta = dc - prev_dc;
622 new_sign = GET_SIGN(delta);
623 delta = (delta ^ sign) - sign;
624 code = MAKE_CODE(delta);
625 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
626 codebook = (code + (code & 1)) >> 1;
627 codebook = FFMIN(codebook, 3);
635 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
636 int plane_size_factor,
637 const uint8_t *scan, const int16_t *qmat)
640 int run, level, run_cb, lev_cb;
641 int max_coeffs, abs_level;
644 max_coeffs = blocks_per_slice << 6;
645 run_cb = ff_prores_run_to_cb_index[4];
646 lev_cb = ff_prores_lev_to_cb_index[2];
649 for (i = 1; i < 64; i++) {
650 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
651 level = blocks[idx] / qmat[scan[i]];
652 *error += FFABS(blocks[idx]) % qmat[scan[i]];
654 abs_level = FFABS(level);
655 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
656 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
659 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
660 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
671 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
672 const uint16_t *src, int linesize,
674 int blocks_per_mb, int plane_size_factor,
675 const int16_t *qmat, ProresThreadData *td)
677 int blocks_per_slice;
680 blocks_per_slice = mbs_per_slice * blocks_per_mb;
682 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
683 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
684 plane_size_factor, ctx->scantable, qmat);
686 return FFALIGN(bits, 8);
689 static int est_alpha_diff(int cur, int prev, int abits)
691 const int dbits = (abits == 8) ? 4 : 7;
692 const int dsize = 1 << dbits - 1;
693 int diff = cur - prev;
695 diff = av_mod_uintp2(diff, abits);
696 if (diff >= (1 << abits) - dsize)
698 if (diff < -dsize || diff > dsize || !diff)
704 static int estimate_alpha_plane(ProresContext *ctx, int *error,
705 const uint16_t *src, int linesize,
706 int mbs_per_slice, int quant,
709 const int abits = ctx->alpha_bits;
710 const int mask = (1 << abits) - 1;
711 const int num_coeffs = mbs_per_slice * 256;
712 int prev = mask, cur;
719 bits = est_alpha_diff(cur, prev, abits);
730 bits += est_alpha_diff(cur, prev, abits);
736 } while (idx < num_coeffs);
748 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
749 int trellis_node, int x, int y, int mbs_per_slice,
750 ProresThreadData *td)
752 ProresContext *ctx = avctx->priv_data;
753 int i, q, pq, xp, yp;
755 int slice_width_factor = av_log2(mbs_per_slice);
756 int num_cblocks[MAX_PLANES], pwidth;
757 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
758 const int min_quant = ctx->profile_info->min_quant;
759 const int max_quant = ctx->profile_info->max_quant;
760 int error, bits, bits_limit;
761 int mbs, prev, cur, new_score;
762 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
765 int linesize[4], line_add;
767 if (ctx->pictures_per_frame == 1)
770 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
771 mbs = x + mbs_per_slice;
773 for (i = 0; i < ctx->num_planes; i++) {
774 is_chroma[i] = (i == 1 || i == 2);
775 plane_factor[i] = slice_width_factor + 2;
777 plane_factor[i] += ctx->chroma_factor - 3;
778 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
782 pwidth = avctx->width;
787 pwidth = avctx->width >> 1;
790 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
791 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
792 line_add * pic->linesize[i]) + xp;
795 get_slice_data(ctx, src, linesize[i], xp, yp,
796 pwidth, avctx->height / ctx->pictures_per_frame,
797 td->blocks[i], td->emu_buf,
798 mbs_per_slice, num_cblocks[i], is_chroma[i]);
800 get_alpha_data(ctx, src, linesize[i], xp, yp,
801 pwidth, avctx->height / ctx->pictures_per_frame,
802 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
806 for (q = min_quant; q < max_quant + 2; q++) {
807 td->nodes[trellis_node + q].prev_node = -1;
808 td->nodes[trellis_node + q].quant = q;
811 // todo: maybe perform coarser quantising to fit into frame size when needed
812 for (q = min_quant; q <= max_quant; q++) {
815 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
816 bits += estimate_slice_plane(ctx, &error, i,
819 num_cblocks[i], plane_factor[i],
823 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
824 mbs_per_slice, q, td->blocks[3]);
825 if (bits > 65000 * 8)
828 slice_bits[q] = bits;
829 slice_score[q] = error;
831 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
832 slice_bits[max_quant + 1] = slice_bits[max_quant];
833 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
834 overquant = max_quant;
836 for (q = max_quant + 1; q < 128; q++) {
839 if (q < MAX_STORED_Q) {
840 qmat = ctx->quants[q];
843 for (i = 0; i < 64; i++)
844 qmat[i] = ctx->quant_mat[i] * q;
846 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
847 bits += estimate_slice_plane(ctx, &error, i,
850 num_cblocks[i], plane_factor[i],
854 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
855 mbs_per_slice, q, td->blocks[3]);
856 if (bits <= ctx->bits_per_mb * mbs_per_slice)
860 slice_bits[max_quant + 1] = bits;
861 slice_score[max_quant + 1] = error;
864 td->nodes[trellis_node + max_quant + 1].quant = overquant;
866 bits_limit = mbs * ctx->bits_per_mb;
867 for (pq = min_quant; pq < max_quant + 2; pq++) {
868 prev = trellis_node - TRELLIS_WIDTH + pq;
870 for (q = min_quant; q < max_quant + 2; q++) {
871 cur = trellis_node + q;
873 bits = td->nodes[prev].bits + slice_bits[q];
874 error = slice_score[q];
875 if (bits > bits_limit)
878 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
879 new_score = td->nodes[prev].score + error;
881 new_score = SCORE_LIMIT;
882 if (td->nodes[cur].prev_node == -1 ||
883 td->nodes[cur].score >= new_score) {
885 td->nodes[cur].bits = bits;
886 td->nodes[cur].score = new_score;
887 td->nodes[cur].prev_node = prev;
892 error = td->nodes[trellis_node + min_quant].score;
893 pq = trellis_node + min_quant;
894 for (q = min_quant + 1; q < max_quant + 2; q++) {
895 if (td->nodes[trellis_node + q].score <= error) {
896 error = td->nodes[trellis_node + q].score;
897 pq = trellis_node + q;
904 static int find_quant_thread(AVCodecContext *avctx, void *arg,
905 int jobnr, int threadnr)
907 ProresContext *ctx = avctx->priv_data;
908 ProresThreadData *td = ctx->tdata + threadnr;
909 int mbs_per_slice = ctx->mbs_per_slice;
910 int x, y = jobnr, mb, q = 0;
912 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
913 while (ctx->mb_width - x < mbs_per_slice)
915 q = find_slice_quant(avctx, arg,
916 (mb + 1) * TRELLIS_WIDTH, x, y,
920 for (x = ctx->slices_width - 1; x >= 0; x--) {
921 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
922 q = td->nodes[q].prev_node;
928 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
929 const AVFrame *pic, int *got_packet)
931 ProresContext *ctx = avctx->priv_data;
932 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
933 uint8_t *picture_size_pos;
935 int x, y, i, mb, q = 0;
936 int sizes[4] = { 0 };
937 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
938 int frame_size, picture_size, slice_size;
940 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
943 pkt_size = ctx->frame_size_upper_bound;
945 if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
948 orig_buf = pkt->data;
951 orig_buf += 4; // frame size
952 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
957 buf += 2; // frame header size will be stored here
958 bytestream_put_be16 (&buf, 0); // version 1
959 bytestream_put_buffer(&buf, ctx->vendor, 4);
960 bytestream_put_be16 (&buf, avctx->width);
961 bytestream_put_be16 (&buf, avctx->height);
963 frame_flags = ctx->chroma_factor << 6;
964 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
965 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
966 bytestream_put_byte (&buf, frame_flags);
968 bytestream_put_byte (&buf, 0); // reserved
969 bytestream_put_byte (&buf, avctx->color_primaries);
970 bytestream_put_byte (&buf, avctx->color_trc);
971 bytestream_put_byte (&buf, avctx->colorspace);
972 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
973 bytestream_put_byte (&buf, 0); // reserved
974 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
975 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
976 // luma quantisation matrix
977 for (i = 0; i < 64; i++)
978 bytestream_put_byte(&buf, ctx->quant_mat[i]);
979 // chroma quantisation matrix
980 for (i = 0; i < 64; i++)
981 bytestream_put_byte(&buf, ctx->quant_mat[i]);
983 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
985 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
987 for (ctx->cur_picture_idx = 0;
988 ctx->cur_picture_idx < ctx->pictures_per_frame;
989 ctx->cur_picture_idx++) {
991 picture_size_pos = buf + 1;
992 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
993 buf += 4; // picture data size will be stored here
994 bytestream_put_be16 (&buf, ctx->slices_per_picture);
995 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
997 // seek table - will be filled during slice encoding
999 buf += ctx->slices_per_picture * 2;
1002 if (!ctx->force_quant) {
1003 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1009 for (y = 0; y < ctx->mb_height; y++) {
1010 int mbs_per_slice = ctx->mbs_per_slice;
1011 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1012 q = ctx->force_quant ? ctx->force_quant
1013 : ctx->slice_q[mb + y * ctx->slices_width];
1015 while (ctx->mb_width - x < mbs_per_slice)
1016 mbs_per_slice >>= 1;
1018 bytestream_put_byte(&buf, slice_hdr_size << 3);
1020 buf += slice_hdr_size - 1;
1021 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1022 uint8_t *start = pkt->data;
1023 // Recompute new size according to max_slice_size
1025 int delta = 200 + (ctx->pictures_per_frame *
1026 ctx->slices_per_picture + 1) *
1027 max_slice_size - pkt_size;
1029 delta = FFMAX(delta, 2 * max_slice_size);
1030 ctx->frame_size_upper_bound += delta;
1033 avpriv_request_sample(avctx,
1034 "Packet too small: is %i,"
1035 " needs %i (slice: %i). "
1036 "Correct allocation",
1037 pkt_size, delta, max_slice_size);
1041 ret = av_grow_packet(pkt, delta);
1047 orig_buf = pkt->data + (orig_buf - start);
1048 buf = pkt->data + (buf - start);
1049 picture_size_pos = pkt->data + (picture_size_pos - start);
1050 slice_sizes = pkt->data + (slice_sizes - start);
1051 slice_hdr = pkt->data + (slice_hdr - start);
1052 tmp = pkt->data + (tmp - start);
1054 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1055 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1060 bytestream_put_byte(&slice_hdr, q);
1061 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1062 for (i = 0; i < ctx->num_planes - 1; i++) {
1063 bytestream_put_be16(&slice_hdr, sizes[i]);
1064 slice_size += sizes[i];
1066 bytestream_put_be16(&slice_sizes, slice_size);
1067 buf += slice_size - slice_hdr_size;
1068 if (max_slice_size < slice_size)
1069 max_slice_size = slice_size;
1073 picture_size = buf - (picture_size_pos - 1);
1074 bytestream_put_be32(&picture_size_pos, picture_size);
1078 frame_size = buf - orig_buf;
1079 bytestream_put_be32(&orig_buf, frame_size);
1081 pkt->size = frame_size;
1082 pkt->flags |= AV_PKT_FLAG_KEY;
1088 static av_cold int encode_close(AVCodecContext *avctx)
1090 ProresContext *ctx = avctx->priv_data;
1093 av_frame_free(&avctx->coded_frame);
1096 for (i = 0; i < avctx->thread_count; i++)
1097 av_freep(&ctx->tdata[i].nodes);
1099 av_freep(&ctx->tdata);
1100 av_freep(&ctx->slice_q);
1105 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1106 int linesize, int16_t *block)
1109 const uint16_t *tsrc = src;
1111 for (y = 0; y < 8; y++) {
1112 for (x = 0; x < 8; x++)
1113 block[y * 8 + x] = tsrc[x];
1114 tsrc += linesize >> 1;
1119 static av_cold int encode_init(AVCodecContext *avctx)
1121 ProresContext *ctx = avctx->priv_data;
1124 int min_quant, max_quant;
1125 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1127 avctx->bits_per_raw_sample = 10;
1128 avctx->coded_frame = av_frame_alloc();
1129 if (!avctx->coded_frame)
1130 return AVERROR(ENOMEM);
1131 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1132 avctx->coded_frame->key_frame = 1;
1134 ctx->fdct = prores_fdct;
1135 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1136 : ff_prores_progressive_scan;
1137 ff_fdctdsp_init(&ctx->fdsp, avctx);
1139 mps = ctx->mbs_per_slice;
1140 if (mps & (mps - 1)) {
1141 av_log(avctx, AV_LOG_ERROR,
1142 "there should be an integer power of two MBs per slice\n");
1143 return AVERROR(EINVAL);
1145 if (ctx->profile == PRORES_PROFILE_AUTO) {
1146 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1147 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1148 !(desc->log2_chroma_w + desc->log2_chroma_h))
1149 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1150 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1151 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1152 ? "4:4:4:4 profile because of the used input colorspace"
1153 : "HQ profile to keep best quality");
1155 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1156 if (ctx->profile != PRORES_PROFILE_4444) {
1157 // force alpha and warn
1158 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1159 "encode alpha. Override with -profile if needed.\n");
1160 ctx->alpha_bits = 0;
1162 if (ctx->alpha_bits & 7) {
1163 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1164 return AVERROR(EINVAL);
1167 ctx->alpha_bits = 0;
1170 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1173 ctx->profile_info = prores_profile_info + ctx->profile;
1174 ctx->num_planes = 3 + !!ctx->alpha_bits;
1176 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1179 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1181 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1183 ctx->slices_width = ctx->mb_width / mps;
1184 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1185 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1186 ctx->pictures_per_frame = 1 + interlaced;
1188 if (ctx->quant_sel == -1)
1189 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1191 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1193 if (strlen(ctx->vendor) != 4) {
1194 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1195 return AVERROR_INVALIDDATA;
1198 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1199 if (!ctx->force_quant) {
1200 if (!ctx->bits_per_mb) {
1201 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1202 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1203 ctx->pictures_per_frame)
1205 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1206 } else if (ctx->bits_per_mb < 128) {
1207 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1208 return AVERROR_INVALIDDATA;
1211 min_quant = ctx->profile_info->min_quant;
1212 max_quant = ctx->profile_info->max_quant;
1213 for (i = min_quant; i < MAX_STORED_Q; i++) {
1214 for (j = 0; j < 64; j++)
1215 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1218 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1219 if (!ctx->slice_q) {
1220 encode_close(avctx);
1221 return AVERROR(ENOMEM);
1224 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1226 encode_close(avctx);
1227 return AVERROR(ENOMEM);
1230 for (j = 0; j < avctx->thread_count; j++) {
1231 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1233 * sizeof(*ctx->tdata->nodes));
1234 if (!ctx->tdata[j].nodes) {
1235 encode_close(avctx);
1236 return AVERROR(ENOMEM);
1238 for (i = min_quant; i < max_quant + 2; i++) {
1239 ctx->tdata[j].nodes[i].prev_node = -1;
1240 ctx->tdata[j].nodes[i].bits = 0;
1241 ctx->tdata[j].nodes[i].score = 0;
1247 if (ctx->force_quant > 64) {
1248 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1249 return AVERROR_INVALIDDATA;
1252 for (j = 0; j < 64; j++) {
1253 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1254 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1257 ctx->bits_per_mb = ls * 8;
1258 if (ctx->chroma_factor == CFACTOR_Y444)
1259 ctx->bits_per_mb += ls * 4;
1262 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1263 ctx->slices_per_picture + 1) *
1264 (2 + 2 * ctx->num_planes +
1265 (mps * ctx->bits_per_mb) / 8)
1268 if (ctx->alpha_bits) {
1269 // The alpha plane is run-coded and might exceed the bit budget.
1270 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1271 ctx->slices_per_picture + 1) *
1272 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1273 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1276 avctx->codec_tag = ctx->profile_info->tag;
1278 av_log(avctx, AV_LOG_DEBUG,
1279 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1280 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1281 interlaced ? "yes" : "no", ctx->bits_per_mb);
1282 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1283 ctx->frame_size_upper_bound);
1288 #define OFFSET(x) offsetof(ProresContext, x)
1289 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1291 static const AVOption options[] = {
1292 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1293 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1294 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1295 { .i64 = PRORES_PROFILE_AUTO },
1296 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444, VE, "profile" },
1297 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1298 0, 0, VE, "profile" },
1299 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1300 0, 0, VE, "profile" },
1301 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1302 0, 0, VE, "profile" },
1303 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1304 0, 0, VE, "profile" },
1305 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1306 0, 0, VE, "profile" },
1307 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1308 0, 0, VE, "profile" },
1309 { "vendor", "vendor ID", OFFSET(vendor),
1310 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1311 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1312 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1313 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1314 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1315 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1316 0, 0, VE, "quant_mat" },
1317 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1318 0, 0, VE, "quant_mat" },
1319 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1320 0, 0, VE, "quant_mat" },
1321 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1322 0, 0, VE, "quant_mat" },
1323 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1324 0, 0, VE, "quant_mat" },
1325 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1326 0, 0, VE, "quant_mat" },
1327 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1328 { .i64 = 16 }, 0, 16, VE },
1332 static const AVClass proresenc_class = {
1333 .class_name = "ProRes encoder",
1334 .item_name = av_default_item_name,
1336 .version = LIBAVUTIL_VERSION_INT,
1339 AVCodec ff_prores_ks_encoder = {
1340 .name = "prores_ks",
1341 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1342 .type = AVMEDIA_TYPE_VIDEO,
1343 .id = AV_CODEC_ID_PRORES,
1344 .priv_data_size = sizeof(ProresContext),
1345 .init = encode_init,
1346 .close = encode_close,
1347 .encode2 = encode_frame,
1348 .capabilities = CODEC_CAP_SLICE_THREADS,
1349 .pix_fmts = (const enum AVPixelFormat[]) {
1350 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1351 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1353 .priv_class = &proresenc_class,