4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
24 #include "libavutil/pixdesc.h"
28 #include "bytestream.h"
30 #include "proresdata.h"
32 #define CFACTOR_Y422 2
33 #define CFACTOR_Y444 3
35 #define MAX_MBS_PER_SLICE 8
40 PRORES_PROFILE_PROXY = 0,
42 PRORES_PROFILE_STANDARD,
55 static const uint8_t prores_quant_matrices[][64] = {
57 4, 7, 9, 11, 13, 14, 15, 63,
58 7, 7, 11, 12, 14, 15, 63, 63,
59 9, 11, 13, 14, 15, 63, 63, 63,
60 11, 11, 13, 14, 63, 63, 63, 63,
61 11, 13, 14, 63, 63, 63, 63, 63,
62 13, 14, 63, 63, 63, 63, 63, 63,
63 13, 63, 63, 63, 63, 63, 63, 63,
64 63, 63, 63, 63, 63, 63, 63, 63,
67 4, 5, 6, 7, 9, 11, 13, 15,
68 5, 5, 7, 8, 11, 13, 15, 17,
69 6, 7, 9, 11, 13, 15, 15, 17,
70 7, 7, 9, 11, 13, 15, 17, 19,
71 7, 9, 11, 13, 14, 16, 19, 23,
72 9, 11, 13, 14, 16, 19, 23, 29,
73 9, 11, 13, 15, 17, 21, 28, 35,
74 11, 13, 16, 17, 21, 28, 35, 41,
77 4, 4, 5, 5, 6, 7, 7, 9,
78 4, 4, 5, 6, 7, 7, 9, 9,
79 5, 5, 6, 7, 7, 9, 9, 10,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 6, 7, 7, 8, 9, 10, 12,
82 6, 7, 7, 8, 9, 10, 12, 15,
83 6, 7, 7, 9, 10, 11, 14, 17,
84 7, 7, 9, 10, 11, 14, 17, 21,
87 4, 4, 4, 4, 4, 4, 4, 4,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 5,
91 4, 4, 4, 4, 4, 4, 5, 5,
92 4, 4, 4, 4, 4, 5, 5, 6,
93 4, 4, 4, 4, 5, 5, 6, 7,
94 4, 4, 4, 4, 5, 6, 7, 7,
97 4, 4, 4, 4, 4, 4, 4, 4,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
108 #define NUM_MB_LIMITS 4
109 static const int prores_mb_limits[NUM_MB_LIMITS] = {
110 1620, // up to 720x576
111 2700, // up to 960x720
112 6075, // up to 1440x1080
113 9216, // up to 2048x1152
116 static const struct prores_profile {
117 const char *full_name;
121 int br_tab[NUM_MB_LIMITS];
123 } prores_profile_info[5] = {
125 .full_name = "proxy",
126 .tag = MKTAG('a', 'p', 'c', 'o'),
129 .br_tab = { 300, 242, 220, 194 },
130 .quant = QUANT_MAT_PROXY,
134 .tag = MKTAG('a', 'p', 'c', 's'),
137 .br_tab = { 720, 560, 490, 440 },
138 .quant = QUANT_MAT_LT,
141 .full_name = "standard",
142 .tag = MKTAG('a', 'p', 'c', 'n'),
145 .br_tab = { 1050, 808, 710, 632 },
146 .quant = QUANT_MAT_STANDARD,
149 .full_name = "high quality",
150 .tag = MKTAG('a', 'p', 'c', 'h'),
153 .br_tab = { 1566, 1216, 1070, 950 },
154 .quant = QUANT_MAT_HQ,
158 .tag = MKTAG('a', 'p', '4', 'h'),
161 .br_tab = { 2350, 1828, 1600, 1425 },
162 .quant = QUANT_MAT_HQ,
166 #define TRELLIS_WIDTH 16
167 #define SCORE_LIMIT INT_MAX / 2
176 #define MAX_STORED_Q 16
178 typedef struct ProresThreadData {
179 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
180 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
181 int16_t custom_q[64];
182 struct TrellisNode *nodes;
185 typedef struct ProresContext {
187 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
188 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
189 int16_t quants[MAX_STORED_Q][64];
190 int16_t custom_q[64];
191 const uint8_t *quant_mat;
192 const uint8_t *scantable;
194 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
195 int linesize, int16_t *block);
199 int mb_width, mb_height;
201 int num_chroma_blocks, chroma_factor;
203 int slices_per_picture;
204 int pictures_per_frame; // 1 for progressive, 2 for interlaced
215 int frame_size_upper_bound;
218 const struct prores_profile *profile_info;
222 ProresThreadData *tdata;
225 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
226 int linesize, int x, int y, int w, int h,
227 int16_t *blocks, uint16_t *emu_buf,
228 int mbs_per_slice, int blocks_per_mb, int is_chroma)
230 const uint16_t *esrc;
231 const int mb_width = 4 * blocks_per_mb;
235 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
237 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
241 if (x + mb_width <= w && y + 16 <= h) {
243 elinesize = linesize;
248 elinesize = 16 * sizeof(*emu_buf);
250 bw = FFMIN(w - x, mb_width);
251 bh = FFMIN(h - y, 16);
253 for (j = 0; j < bh; j++) {
254 memcpy(emu_buf + j * 16,
255 (const uint8_t*)src + j * linesize,
257 pix = emu_buf[j * 16 + bw - 1];
258 for (k = bw; k < mb_width; k++)
259 emu_buf[j * 16 + k] = pix;
262 memcpy(emu_buf + j * 16,
263 emu_buf + (bh - 1) * 16,
264 mb_width * sizeof(*emu_buf));
267 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
269 if (blocks_per_mb > 2) {
270 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
273 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
275 if (blocks_per_mb > 2) {
276 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
280 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
282 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
284 if (blocks_per_mb > 2) {
285 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
287 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
296 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
297 int linesize, int x, int y, int w, int h,
298 int16_t *blocks, int mbs_per_slice, int abits)
300 const int slice_width = 16 * mbs_per_slice;
301 int i, j, copy_w, copy_h;
303 copy_w = FFMIN(w - x, slice_width);
304 copy_h = FFMIN(h - y, 16);
305 for (i = 0; i < copy_h; i++) {
306 memcpy(blocks, src, copy_w * sizeof(*src));
308 for (j = 0; j < copy_w; j++)
311 for (j = 0; j < copy_w; j++)
312 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
313 for (j = copy_w; j < slice_width; j++)
314 blocks[j] = blocks[copy_w - 1];
315 blocks += slice_width;
316 src += linesize >> 1;
318 for (; i < 16; i++) {
319 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
320 blocks += slice_width;
325 * Write an unsigned rice/exp golomb codeword.
327 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
329 unsigned int rice_order, exp_order, switch_bits, switch_val;
332 /* number of prefix bits to switch between Rice and expGolomb */
333 switch_bits = (codebook & 3) + 1;
334 rice_order = codebook >> 5; /* rice code order */
335 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
337 switch_val = switch_bits << rice_order;
339 if (val >= switch_val) {
340 val -= switch_val - (1 << exp_order);
341 exponent = av_log2(val);
343 put_bits(pb, exponent - exp_order + switch_bits, 0);
344 put_bits(pb, exponent + 1, val);
346 exponent = val >> rice_order;
349 put_bits(pb, exponent, 0);
352 put_sbits(pb, rice_order, val);
356 #define GET_SIGN(x) ((x) >> 31)
357 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
359 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
360 int blocks_per_slice, int scale)
363 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
365 prev_dc = (blocks[0] - 0x4000) / scale;
366 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
371 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
372 dc = (blocks[0] - 0x4000) / scale;
373 delta = dc - prev_dc;
374 new_sign = GET_SIGN(delta);
375 delta = (delta ^ sign) - sign;
376 code = MAKE_CODE(delta);
377 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
378 codebook = (code + (code & 1)) >> 1;
379 codebook = FFMIN(codebook, 3);
385 static void encode_acs(PutBitContext *pb, int16_t *blocks,
386 int blocks_per_slice,
387 int plane_size_factor,
388 const uint8_t *scan, const int16_t *qmat)
391 int run, level, run_cb, lev_cb;
392 int max_coeffs, abs_level;
394 max_coeffs = blocks_per_slice << 6;
395 run_cb = ff_prores_run_to_cb_index[4];
396 lev_cb = ff_prores_lev_to_cb_index[2];
399 for (i = 1; i < 64; i++) {
400 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
401 level = blocks[idx] / qmat[scan[i]];
403 abs_level = FFABS(level);
404 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
405 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
407 put_sbits(pb, 1, GET_SIGN(level));
409 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
410 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
419 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
420 const uint16_t *src, int linesize,
421 int mbs_per_slice, int16_t *blocks,
422 int blocks_per_mb, int plane_size_factor,
425 int blocks_per_slice, saved_pos;
427 saved_pos = put_bits_count(pb);
428 blocks_per_slice = mbs_per_slice * blocks_per_mb;
430 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
431 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
432 ctx->scantable, qmat);
435 return (put_bits_count(pb) - saved_pos) >> 3;
438 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
440 const int mask = (1 << abits) - 1;
441 const int dbits = (abits == 8) ? 4 : 7;
442 const int dsize = 1 << dbits - 1;
443 int diff = cur - prev;
446 if (diff >= (1 << abits) - dsize)
448 if (diff < -dsize || diff > dsize || !diff) {
450 put_bits(pb, abits, diff);
453 put_bits(pb, dbits - 1, FFABS(diff) - 1);
454 put_bits(pb, 1, diff < 0);
458 static void put_alpha_run(PutBitContext *pb, int run)
463 put_bits(pb, 4, run);
465 put_bits(pb, 15, run);
471 // todo alpha quantisation for high quants
472 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
473 int mbs_per_slice, uint16_t *blocks,
476 const int abits = ctx->alpha_bits;
477 const int mask = (1 << abits) - 1;
478 const int num_coeffs = mbs_per_slice * 256;
479 int saved_pos = put_bits_count(pb);
480 int prev = mask, cur;
485 put_alpha_diff(pb, cur, prev, abits);
490 put_alpha_run (pb, run);
491 put_alpha_diff(pb, cur, prev, abits);
497 } while (idx < num_coeffs);
499 put_alpha_run(pb, run);
501 return (put_bits_count(pb) - saved_pos) >> 3;
504 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
506 int sizes[4], int x, int y, int quant,
509 ProresContext *ctx = avctx->priv_data;
513 int slice_width_factor = av_log2(mbs_per_slice);
514 int num_cblocks, pwidth, linesize, line_add;
515 int plane_factor, is_chroma;
518 if (ctx->pictures_per_frame == 1)
521 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
523 if (ctx->force_quant) {
524 qmat = ctx->quants[0];
525 } else if (quant < MAX_STORED_Q) {
526 qmat = ctx->quants[quant];
528 qmat = ctx->custom_q;
529 for (i = 0; i < 64; i++)
530 qmat[i] = ctx->quant_mat[i] * quant;
533 for (i = 0; i < ctx->num_planes; i++) {
534 is_chroma = (i == 1 || i == 2);
535 plane_factor = slice_width_factor + 2;
537 plane_factor += ctx->chroma_factor - 3;
538 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
542 pwidth = avctx->width;
547 pwidth = avctx->width >> 1;
550 linesize = pic->linesize[i] * ctx->pictures_per_frame;
551 src = (const uint16_t*)(pic->data[i] + yp * linesize +
552 line_add * pic->linesize[i]) + xp;
555 get_slice_data(ctx, src, linesize, xp, yp,
556 pwidth, avctx->height / ctx->pictures_per_frame,
557 ctx->blocks[0], ctx->emu_buf,
558 mbs_per_slice, num_cblocks, is_chroma);
559 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
560 mbs_per_slice, ctx->blocks[0],
561 num_cblocks, plane_factor,
564 get_alpha_data(ctx, src, linesize, xp, yp,
565 pwidth, avctx->height / ctx->pictures_per_frame,
566 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
567 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
568 ctx->blocks[0], quant);
570 total_size += sizes[i];
571 if (put_bits_left(pb) < 0) {
572 av_log(avctx, AV_LOG_ERROR,
573 "Underestimated required buffer size.\n");
580 static inline int estimate_vlc(unsigned codebook, int val)
582 unsigned int rice_order, exp_order, switch_bits, switch_val;
585 /* number of prefix bits to switch between Rice and expGolomb */
586 switch_bits = (codebook & 3) + 1;
587 rice_order = codebook >> 5; /* rice code order */
588 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
590 switch_val = switch_bits << rice_order;
592 if (val >= switch_val) {
593 val -= switch_val - (1 << exp_order);
594 exponent = av_log2(val);
596 return exponent * 2 - exp_order + switch_bits + 1;
598 return (val >> rice_order) + rice_order + 1;
602 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
606 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
609 prev_dc = (blocks[0] - 0x4000) / scale;
610 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
614 *error += FFABS(blocks[0] - 0x4000) % scale;
616 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
617 dc = (blocks[0] - 0x4000) / scale;
618 *error += FFABS(blocks[0] - 0x4000) % scale;
619 delta = dc - prev_dc;
620 new_sign = GET_SIGN(delta);
621 delta = (delta ^ sign) - sign;
622 code = MAKE_CODE(delta);
623 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
624 codebook = (code + (code & 1)) >> 1;
625 codebook = FFMIN(codebook, 3);
633 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
634 int plane_size_factor,
635 const uint8_t *scan, const int16_t *qmat)
638 int run, level, run_cb, lev_cb;
639 int max_coeffs, abs_level;
642 max_coeffs = blocks_per_slice << 6;
643 run_cb = ff_prores_run_to_cb_index[4];
644 lev_cb = ff_prores_lev_to_cb_index[2];
647 for (i = 1; i < 64; i++) {
648 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
649 level = blocks[idx] / qmat[scan[i]];
650 *error += FFABS(blocks[idx]) % qmat[scan[i]];
652 abs_level = FFABS(level);
653 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
654 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
657 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
658 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
669 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
670 const uint16_t *src, int linesize,
672 int blocks_per_mb, int plane_size_factor,
673 const int16_t *qmat, ProresThreadData *td)
675 int blocks_per_slice;
678 blocks_per_slice = mbs_per_slice * blocks_per_mb;
680 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
681 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
682 plane_size_factor, ctx->scantable, qmat);
684 return FFALIGN(bits, 8);
687 static int est_alpha_diff(int cur, int prev, int abits)
689 const int mask = (1 << abits) - 1;
690 const int dbits = (abits == 8) ? 4 : 7;
691 const int dsize = 1 << dbits - 1;
692 int diff = cur - prev;
695 if (diff >= (1 << abits) - dsize)
697 if (diff < -dsize || diff > dsize || !diff)
703 static int estimate_alpha_plane(ProresContext *ctx, int *error,
704 const uint16_t *src, int linesize,
705 int mbs_per_slice, int quant,
708 const int abits = ctx->alpha_bits;
709 const int mask = (1 << abits) - 1;
710 const int num_coeffs = mbs_per_slice * 256;
711 int prev = mask, cur;
718 bits = est_alpha_diff(cur, prev, abits);
729 bits += est_alpha_diff(cur, prev, abits);
735 } while (idx < num_coeffs);
747 static int find_slice_quant(AVCodecContext *avctx,
748 int trellis_node, int x, int y, int mbs_per_slice,
749 ProresThreadData *td)
751 ProresContext *ctx = avctx->priv_data;
752 int i, q, pq, xp, yp;
754 int slice_width_factor = av_log2(mbs_per_slice);
755 int num_cblocks[MAX_PLANES], pwidth;
756 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
757 const int min_quant = ctx->profile_info->min_quant;
758 const int max_quant = ctx->profile_info->max_quant;
759 int error, bits, bits_limit;
760 int mbs, prev, cur, new_score;
761 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
764 int linesize[4], line_add;
766 if (ctx->pictures_per_frame == 1)
769 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
770 mbs = x + mbs_per_slice;
772 for (i = 0; i < ctx->num_planes; i++) {
773 is_chroma[i] = (i == 1 || i == 2);
774 plane_factor[i] = slice_width_factor + 2;
776 plane_factor[i] += ctx->chroma_factor - 3;
777 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
781 pwidth = avctx->width;
786 pwidth = avctx->width >> 1;
789 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
790 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
791 line_add * ctx->pic->linesize[i]) + xp;
794 get_slice_data(ctx, src, linesize[i], xp, yp,
795 pwidth, avctx->height / ctx->pictures_per_frame,
796 td->blocks[i], td->emu_buf,
797 mbs_per_slice, num_cblocks[i], is_chroma[i]);
799 get_alpha_data(ctx, src, linesize[i], xp, yp,
800 pwidth, avctx->height / ctx->pictures_per_frame,
801 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
805 for (q = min_quant; q < max_quant + 2; q++) {
806 td->nodes[trellis_node + q].prev_node = -1;
807 td->nodes[trellis_node + q].quant = q;
810 // todo: maybe perform coarser quantising to fit into frame size when needed
811 for (q = min_quant; q <= max_quant; q++) {
814 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
815 bits += estimate_slice_plane(ctx, &error, i,
818 num_cblocks[i], plane_factor[i],
822 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
823 mbs_per_slice, q, td->blocks[3]);
824 if (bits > 65000 * 8)
827 slice_bits[q] = bits;
828 slice_score[q] = error;
830 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
831 slice_bits[max_quant + 1] = slice_bits[max_quant];
832 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
833 overquant = max_quant;
835 for (q = max_quant + 1; q < 128; q++) {
838 if (q < MAX_STORED_Q) {
839 qmat = ctx->quants[q];
842 for (i = 0; i < 64; i++)
843 qmat[i] = ctx->quant_mat[i] * q;
845 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
846 bits += estimate_slice_plane(ctx, &error, i,
849 num_cblocks[i], plane_factor[i],
853 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
854 mbs_per_slice, q, td->blocks[3]);
855 if (bits <= ctx->bits_per_mb * mbs_per_slice)
859 slice_bits[max_quant + 1] = bits;
860 slice_score[max_quant + 1] = error;
863 td->nodes[trellis_node + max_quant + 1].quant = overquant;
865 bits_limit = mbs * ctx->bits_per_mb;
866 for (pq = min_quant; pq < max_quant + 2; pq++) {
867 prev = trellis_node - TRELLIS_WIDTH + pq;
869 for (q = min_quant; q < max_quant + 2; q++) {
870 cur = trellis_node + q;
872 bits = td->nodes[prev].bits + slice_bits[q];
873 error = slice_score[q];
874 if (bits > bits_limit)
877 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
878 new_score = td->nodes[prev].score + error;
880 new_score = SCORE_LIMIT;
881 if (td->nodes[cur].prev_node == -1 ||
882 td->nodes[cur].score >= new_score) {
884 td->nodes[cur].bits = bits;
885 td->nodes[cur].score = new_score;
886 td->nodes[cur].prev_node = prev;
891 error = td->nodes[trellis_node + min_quant].score;
892 pq = trellis_node + min_quant;
893 for (q = min_quant + 1; q < max_quant + 2; q++) {
894 if (td->nodes[trellis_node + q].score <= error) {
895 error = td->nodes[trellis_node + q].score;
896 pq = trellis_node + q;
903 static int find_quant_thread(AVCodecContext *avctx, void *arg,
904 int jobnr, int threadnr)
906 ProresContext *ctx = avctx->priv_data;
907 ProresThreadData *td = ctx->tdata + threadnr;
908 int mbs_per_slice = ctx->mbs_per_slice;
909 int x, y = jobnr, mb, q = 0;
911 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
912 while (ctx->mb_width - x < mbs_per_slice)
914 q = find_slice_quant(avctx,
915 (mb + 1) * TRELLIS_WIDTH, x, y,
919 for (x = ctx->slices_width - 1; x >= 0; x--) {
920 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
921 q = td->nodes[q].prev_node;
927 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
928 const AVFrame *pic, int *got_packet)
930 ProresContext *ctx = avctx->priv_data;
931 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
932 uint8_t *picture_size_pos;
934 int x, y, i, mb, q = 0;
935 int sizes[4] = { 0 };
936 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
937 int frame_size, picture_size, slice_size;
938 int pkt_size, ret, max_slice_size = 0;
942 #if FF_API_CODED_FRAME
943 FF_DISABLE_DEPRECATION_WARNINGS
944 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
945 avctx->coded_frame->key_frame = 1;
946 FF_ENABLE_DEPRECATION_WARNINGS
949 pkt_size = ctx->frame_size_upper_bound;
951 if ((ret = ff_alloc_packet(pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0) {
952 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
956 orig_buf = pkt->data;
959 orig_buf += 4; // frame size
960 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
965 buf += 2; // frame header size will be stored here
966 bytestream_put_be16 (&buf, 0); // version 1
967 bytestream_put_buffer(&buf, ctx->vendor, 4);
968 bytestream_put_be16 (&buf, avctx->width);
969 bytestream_put_be16 (&buf, avctx->height);
971 frame_flags = ctx->chroma_factor << 6;
972 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
973 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
974 bytestream_put_byte (&buf, frame_flags);
976 bytestream_put_byte (&buf, 0); // reserved
977 bytestream_put_byte (&buf, avctx->color_primaries);
978 bytestream_put_byte (&buf, avctx->color_trc);
979 bytestream_put_byte (&buf, avctx->colorspace);
980 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
981 bytestream_put_byte (&buf, 0); // reserved
982 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
983 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
984 // luma quantisation matrix
985 for (i = 0; i < 64; i++)
986 bytestream_put_byte(&buf, ctx->quant_mat[i]);
987 // chroma quantisation matrix
988 for (i = 0; i < 64; i++)
989 bytestream_put_byte(&buf, ctx->quant_mat[i]);
991 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
993 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
995 for (ctx->cur_picture_idx = 0;
996 ctx->cur_picture_idx < ctx->pictures_per_frame;
997 ctx->cur_picture_idx++) {
999 picture_size_pos = buf + 1;
1000 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1001 buf += 4; // picture data size will be stored here
1002 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1003 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1005 // seek table - will be filled during slice encoding
1007 buf += ctx->slices_per_picture * 2;
1010 if (!ctx->force_quant) {
1011 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1017 for (y = 0; y < ctx->mb_height; y++) {
1018 int mbs_per_slice = ctx->mbs_per_slice;
1019 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1020 q = ctx->force_quant ? ctx->force_quant
1021 : ctx->slice_q[mb + y * ctx->slices_width];
1023 while (ctx->mb_width - x < mbs_per_slice)
1024 mbs_per_slice >>= 1;
1026 bytestream_put_byte(&buf, slice_hdr_size << 3);
1028 buf += slice_hdr_size - 1;
1029 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1030 uint8_t *start = pkt->data;
1031 // Recompute new size according to max_slice_size
1033 int delta = 200 + ctx->pictures_per_frame *
1034 ctx->slices_per_picture * max_slice_size -
1037 delta = FFMAX(delta, 2 * max_slice_size);
1038 ctx->frame_size_upper_bound += delta;
1041 avpriv_request_sample(avctx,
1042 "Packet too small: is %i,"
1043 " needs %i (slice: %i). "
1044 "Correct allocation",
1045 pkt_size, delta, max_slice_size);
1049 ret = av_grow_packet(pkt, delta);
1055 orig_buf = pkt->data + (orig_buf - start);
1056 buf = pkt->data + (buf - start);
1057 picture_size_pos = pkt->data + (picture_size_pos - start);
1058 slice_sizes = pkt->data + (slice_sizes - start);
1059 slice_hdr = pkt->data + (slice_hdr - start);
1060 tmp = pkt->data + (tmp - start);
1062 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1063 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1068 bytestream_put_byte(&slice_hdr, q);
1069 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1070 for (i = 0; i < ctx->num_planes - 1; i++) {
1071 bytestream_put_be16(&slice_hdr, sizes[i]);
1072 slice_size += sizes[i];
1074 bytestream_put_be16(&slice_sizes, slice_size);
1075 buf += slice_size - slice_hdr_size;
1076 if (max_slice_size < slice_size)
1077 max_slice_size = slice_size;
1081 if (ctx->pictures_per_frame == 1)
1082 picture_size = buf - picture_size_pos - 6;
1084 picture_size = buf - picture_size_pos + 1;
1085 bytestream_put_be32(&picture_size_pos, picture_size);
1089 frame_size = buf - orig_buf;
1090 bytestream_put_be32(&orig_buf, frame_size);
1092 pkt->size = frame_size;
1093 pkt->flags |= AV_PKT_FLAG_KEY;
1099 static av_cold int encode_close(AVCodecContext *avctx)
1101 ProresContext *ctx = avctx->priv_data;
1105 for (i = 0; i < avctx->thread_count; i++)
1106 av_free(ctx->tdata[i].nodes);
1108 av_freep(&ctx->tdata);
1109 av_freep(&ctx->slice_q);
1114 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1115 int linesize, int16_t *block)
1118 const uint16_t *tsrc = src;
1120 for (y = 0; y < 8; y++) {
1121 for (x = 0; x < 8; x++)
1122 block[y * 8 + x] = tsrc[x];
1123 tsrc += linesize >> 1;
1128 static av_cold int encode_init(AVCodecContext *avctx)
1130 ProresContext *ctx = avctx->priv_data;
1133 int min_quant, max_quant;
1134 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
1136 avctx->bits_per_raw_sample = 10;
1138 ctx->fdct = prores_fdct;
1139 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1140 : ff_prores_progressive_scan;
1141 ff_fdctdsp_init(&ctx->fdsp, avctx);
1143 mps = ctx->mbs_per_slice;
1144 if (mps & (mps - 1)) {
1145 av_log(avctx, AV_LOG_ERROR,
1146 "there should be an integer power of two MBs per slice\n");
1147 return AVERROR(EINVAL);
1149 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1150 if (ctx->alpha_bits & 7) {
1151 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1152 return AVERROR(EINVAL);
1154 avctx->bits_per_coded_sample = 32;
1156 ctx->alpha_bits = 0;
1159 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1162 ctx->profile_info = prores_profile_info + ctx->profile;
1163 ctx->num_planes = 3 + !!ctx->alpha_bits;
1165 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1168 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1170 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1172 ctx->slices_width = ctx->mb_width / mps;
1173 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1174 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1175 ctx->pictures_per_frame = 1 + interlaced;
1177 if (ctx->quant_sel == -1)
1178 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1180 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1182 if (strlen(ctx->vendor) != 4) {
1183 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1184 return AVERROR_INVALIDDATA;
1187 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1188 if (!ctx->force_quant) {
1189 if (!ctx->bits_per_mb) {
1190 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1191 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1192 ctx->pictures_per_frame)
1194 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1195 } else if (ctx->bits_per_mb < 128) {
1196 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1197 return AVERROR_INVALIDDATA;
1200 min_quant = ctx->profile_info->min_quant;
1201 max_quant = ctx->profile_info->max_quant;
1202 for (i = min_quant; i < MAX_STORED_Q; i++) {
1203 for (j = 0; j < 64; j++)
1204 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1207 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1208 if (!ctx->slice_q) {
1209 encode_close(avctx);
1210 return AVERROR(ENOMEM);
1213 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1215 encode_close(avctx);
1216 return AVERROR(ENOMEM);
1219 for (j = 0; j < avctx->thread_count; j++) {
1220 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1222 * sizeof(*ctx->tdata->nodes));
1223 if (!ctx->tdata[j].nodes) {
1224 encode_close(avctx);
1225 return AVERROR(ENOMEM);
1227 for (i = min_quant; i < max_quant + 2; i++) {
1228 ctx->tdata[j].nodes[i].prev_node = -1;
1229 ctx->tdata[j].nodes[i].bits = 0;
1230 ctx->tdata[j].nodes[i].score = 0;
1236 if (ctx->force_quant > 64) {
1237 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1238 return AVERROR_INVALIDDATA;
1241 for (j = 0; j < 64; j++) {
1242 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1243 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1246 ctx->bits_per_mb = ls * 8;
1247 if (ctx->chroma_factor == CFACTOR_Y444)
1248 ctx->bits_per_mb += ls * 4;
1251 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1252 ctx->slices_per_picture *
1253 (2 + 2 * ctx->num_planes +
1254 (mps * ctx->bits_per_mb) / 8)
1257 if (ctx->alpha_bits) {
1258 // The alpha plane is run-coded and might exceed the bit budget.
1259 ctx->frame_size_upper_bound += ctx->pictures_per_frame *
1260 ctx->slices_per_picture *
1261 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1262 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1265 avctx->codec_tag = ctx->profile_info->tag;
1267 av_log(avctx, AV_LOG_DEBUG,
1268 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1269 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1270 interlaced ? "yes" : "no", ctx->bits_per_mb);
1271 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1272 ctx->frame_size_upper_bound);
1277 #define OFFSET(x) offsetof(ProresContext, x)
1278 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1280 static const AVOption options[] = {
1281 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1282 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1283 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1284 { .i64 = PRORES_PROFILE_STANDARD },
1285 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1286 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1287 0, 0, VE, "profile" },
1288 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1289 0, 0, VE, "profile" },
1290 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1291 0, 0, VE, "profile" },
1292 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1293 0, 0, VE, "profile" },
1294 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1295 0, 0, VE, "profile" },
1296 { "vendor", "vendor ID", OFFSET(vendor),
1297 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1298 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1299 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1300 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1301 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1302 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1303 0, 0, VE, "quant_mat" },
1304 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1305 0, 0, VE, "quant_mat" },
1306 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1307 0, 0, VE, "quant_mat" },
1308 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1309 0, 0, VE, "quant_mat" },
1310 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1311 0, 0, VE, "quant_mat" },
1312 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1313 0, 0, VE, "quant_mat" },
1314 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1315 { .i64 = 16 }, 0, 16, VE },
1319 static const AVClass proresenc_class = {
1320 .class_name = "ProRes encoder",
1321 .item_name = av_default_item_name,
1323 .version = LIBAVUTIL_VERSION_INT,
1326 AVCodec ff_prores_encoder = {
1328 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1329 .type = AVMEDIA_TYPE_VIDEO,
1330 .id = AV_CODEC_ID_PRORES,
1331 .priv_data_size = sizeof(ProresContext),
1332 .init = encode_init,
1333 .close = encode_close,
1334 .encode2 = encode_frame,
1335 .capabilities = CODEC_CAP_SLICE_THREADS,
1336 .pix_fmts = (const enum AVPixelFormat[]) {
1337 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1338 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1340 .priv_class = &proresenc_class,