4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
24 #include "libavutil/pixdesc.h"
28 #include "bytestream.h"
30 #include "proresdata.h"
32 #define CFACTOR_Y422 2
33 #define CFACTOR_Y444 3
35 #define MAX_MBS_PER_SLICE 8
40 PRORES_PROFILE_PROXY = 0,
42 PRORES_PROFILE_STANDARD,
55 static const uint8_t prores_quant_matrices[][64] = {
57 4, 7, 9, 11, 13, 14, 15, 63,
58 7, 7, 11, 12, 14, 15, 63, 63,
59 9, 11, 13, 14, 15, 63, 63, 63,
60 11, 11, 13, 14, 63, 63, 63, 63,
61 11, 13, 14, 63, 63, 63, 63, 63,
62 13, 14, 63, 63, 63, 63, 63, 63,
63 13, 63, 63, 63, 63, 63, 63, 63,
64 63, 63, 63, 63, 63, 63, 63, 63,
67 4, 5, 6, 7, 9, 11, 13, 15,
68 5, 5, 7, 8, 11, 13, 15, 17,
69 6, 7, 9, 11, 13, 15, 15, 17,
70 7, 7, 9, 11, 13, 15, 17, 19,
71 7, 9, 11, 13, 14, 16, 19, 23,
72 9, 11, 13, 14, 16, 19, 23, 29,
73 9, 11, 13, 15, 17, 21, 28, 35,
74 11, 13, 16, 17, 21, 28, 35, 41,
77 4, 4, 5, 5, 6, 7, 7, 9,
78 4, 4, 5, 6, 7, 7, 9, 9,
79 5, 5, 6, 7, 7, 9, 9, 10,
80 5, 5, 6, 7, 7, 9, 9, 10,
81 5, 6, 7, 7, 8, 9, 10, 12,
82 6, 7, 7, 8, 9, 10, 12, 15,
83 6, 7, 7, 9, 10, 11, 14, 17,
84 7, 7, 9, 10, 11, 14, 17, 21,
87 4, 4, 4, 4, 4, 4, 4, 4,
88 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 5,
91 4, 4, 4, 4, 4, 4, 5, 5,
92 4, 4, 4, 4, 4, 5, 5, 6,
93 4, 4, 4, 4, 5, 5, 6, 7,
94 4, 4, 4, 4, 5, 6, 7, 7,
97 4, 4, 4, 4, 4, 4, 4, 4,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
103 4, 4, 4, 4, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4,
108 #define NUM_MB_LIMITS 4
109 static const int prores_mb_limits[NUM_MB_LIMITS] = {
110 1620, // up to 720x576
111 2700, // up to 960x720
112 6075, // up to 1440x1080
113 9216, // up to 2048x1152
116 static const struct prores_profile {
117 const char *full_name;
121 int br_tab[NUM_MB_LIMITS];
123 } prores_profile_info[5] = {
125 .full_name = "proxy",
126 .tag = MKTAG('a', 'p', 'c', 'o'),
129 .br_tab = { 300, 242, 220, 194 },
130 .quant = QUANT_MAT_PROXY,
134 .tag = MKTAG('a', 'p', 'c', 's'),
137 .br_tab = { 720, 560, 490, 440 },
138 .quant = QUANT_MAT_LT,
141 .full_name = "standard",
142 .tag = MKTAG('a', 'p', 'c', 'n'),
145 .br_tab = { 1050, 808, 710, 632 },
146 .quant = QUANT_MAT_STANDARD,
149 .full_name = "high quality",
150 .tag = MKTAG('a', 'p', 'c', 'h'),
153 .br_tab = { 1566, 1216, 1070, 950 },
154 .quant = QUANT_MAT_HQ,
158 .tag = MKTAG('a', 'p', '4', 'h'),
161 .br_tab = { 2350, 1828, 1600, 1425 },
162 .quant = QUANT_MAT_HQ,
166 #define TRELLIS_WIDTH 16
167 #define SCORE_LIMIT INT_MAX / 2
176 #define MAX_STORED_Q 16
178 typedef struct ProresThreadData {
179 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
180 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
181 int16_t custom_q[64];
182 struct TrellisNode *nodes;
185 typedef struct ProresContext {
187 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
188 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
189 int16_t quants[MAX_STORED_Q][64];
190 int16_t custom_q[64];
191 const uint8_t *quant_mat;
192 const uint8_t *scantable;
194 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
195 ptrdiff_t linesize, int16_t *block);
199 int mb_width, mb_height;
201 int num_chroma_blocks, chroma_factor;
203 int slices_per_picture;
204 int pictures_per_frame; // 1 for progressive, 2 for interlaced
215 int frame_size_upper_bound;
218 const struct prores_profile *profile_info;
222 ProresThreadData *tdata;
225 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
226 ptrdiff_t linesize, int x, int y, int w, int h,
227 int16_t *blocks, uint16_t *emu_buf,
228 int mbs_per_slice, int blocks_per_mb, int is_chroma)
230 const uint16_t *esrc;
231 const int mb_width = 4 * blocks_per_mb;
235 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
237 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
241 if (x + mb_width <= w && y + 16 <= h) {
243 elinesize = linesize;
248 elinesize = 16 * sizeof(*emu_buf);
250 bw = FFMIN(w - x, mb_width);
251 bh = FFMIN(h - y, 16);
253 for (j = 0; j < bh; j++) {
254 memcpy(emu_buf + j * 16,
255 (const uint8_t*)src + j * linesize,
257 pix = emu_buf[j * 16 + bw - 1];
258 for (k = bw; k < mb_width; k++)
259 emu_buf[j * 16 + k] = pix;
262 memcpy(emu_buf + j * 16,
263 emu_buf + (bh - 1) * 16,
264 mb_width * sizeof(*emu_buf));
267 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
269 if (blocks_per_mb > 2) {
270 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
273 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
275 if (blocks_per_mb > 2) {
276 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
280 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
282 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
284 if (blocks_per_mb > 2) {
285 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
287 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
296 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
297 ptrdiff_t linesize, int x, int y, int w, int h,
298 int16_t *blocks, int mbs_per_slice, int abits)
300 const int slice_width = 16 * mbs_per_slice;
301 int i, j, copy_w, copy_h;
303 copy_w = FFMIN(w - x, slice_width);
304 copy_h = FFMIN(h - y, 16);
305 for (i = 0; i < copy_h; i++) {
306 memcpy(blocks, src, copy_w * sizeof(*src));
308 for (j = 0; j < copy_w; j++)
311 for (j = 0; j < copy_w; j++)
312 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
313 for (j = copy_w; j < slice_width; j++)
314 blocks[j] = blocks[copy_w - 1];
315 blocks += slice_width;
316 src += linesize >> 1;
318 for (; i < 16; i++) {
319 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
320 blocks += slice_width;
325 * Write an unsigned rice/exp golomb codeword.
327 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
329 unsigned int rice_order, exp_order, switch_bits, switch_val;
332 /* number of prefix bits to switch between Rice and expGolomb */
333 switch_bits = (codebook & 3) + 1;
334 rice_order = codebook >> 5; /* rice code order */
335 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
337 switch_val = switch_bits << rice_order;
339 if (val >= switch_val) {
340 val -= switch_val - (1 << exp_order);
341 exponent = av_log2(val);
343 put_bits(pb, exponent - exp_order + switch_bits, 0);
344 put_bits(pb, exponent + 1, val);
346 exponent = val >> rice_order;
349 put_bits(pb, exponent, 0);
352 put_sbits(pb, rice_order, val);
356 #define GET_SIGN(x) ((x) >> 31)
357 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
359 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
360 int blocks_per_slice, int scale)
363 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
365 prev_dc = (blocks[0] - 0x4000) / scale;
366 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
371 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
372 dc = (blocks[0] - 0x4000) / scale;
373 delta = dc - prev_dc;
374 new_sign = GET_SIGN(delta);
375 delta = (delta ^ sign) - sign;
376 code = MAKE_CODE(delta);
377 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
378 codebook = (code + (code & 1)) >> 1;
379 codebook = FFMIN(codebook, 3);
385 static void encode_acs(PutBitContext *pb, int16_t *blocks,
386 int blocks_per_slice,
387 int plane_size_factor,
388 const uint8_t *scan, const int16_t *qmat)
391 int run, level, run_cb, lev_cb;
392 int max_coeffs, abs_level;
394 max_coeffs = blocks_per_slice << 6;
395 run_cb = ff_prores_run_to_cb_index[4];
396 lev_cb = ff_prores_lev_to_cb_index[2];
399 for (i = 1; i < 64; i++) {
400 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
401 level = blocks[idx] / qmat[scan[i]];
403 abs_level = FFABS(level);
404 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
405 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
407 put_sbits(pb, 1, GET_SIGN(level));
409 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
410 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
419 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
420 const uint16_t *src, ptrdiff_t linesize,
421 int mbs_per_slice, int16_t *blocks,
422 int blocks_per_mb, int plane_size_factor,
425 int blocks_per_slice, saved_pos;
427 saved_pos = put_bits_count(pb);
428 blocks_per_slice = mbs_per_slice * blocks_per_mb;
430 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
431 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
432 ctx->scantable, qmat);
435 return (put_bits_count(pb) - saved_pos) >> 3;
438 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
440 const int mask = (1 << abits) - 1;
441 const int dbits = (abits == 8) ? 4 : 7;
442 const int dsize = 1 << dbits - 1;
443 int diff = cur - prev;
446 if (diff >= (1 << abits) - dsize)
448 if (diff < -dsize || diff > dsize || !diff) {
450 put_bits(pb, abits, diff);
453 put_bits(pb, dbits - 1, FFABS(diff) - 1);
454 put_bits(pb, 1, diff < 0);
458 static void put_alpha_run(PutBitContext *pb, int run)
463 put_bits(pb, 4, run);
465 put_bits(pb, 15, run);
471 // todo alpha quantisation for high quants
472 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
473 int mbs_per_slice, uint16_t *blocks,
476 const int abits = ctx->alpha_bits;
477 const int mask = (1 << abits) - 1;
478 const int num_coeffs = mbs_per_slice * 256;
479 int saved_pos = put_bits_count(pb);
480 int prev = mask, cur;
485 put_alpha_diff(pb, cur, prev, abits);
490 put_alpha_run (pb, run);
491 put_alpha_diff(pb, cur, prev, abits);
497 } while (idx < num_coeffs);
499 put_alpha_run(pb, run);
501 return (put_bits_count(pb) - saved_pos) >> 3;
504 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
506 int sizes[4], int x, int y, int quant,
509 ProresContext *ctx = avctx->priv_data;
513 int slice_width_factor = av_log2(mbs_per_slice);
514 int num_cblocks, pwidth, line_add;
516 int plane_factor, is_chroma;
519 if (ctx->pictures_per_frame == 1)
522 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
524 if (ctx->force_quant) {
525 qmat = ctx->quants[0];
526 } else if (quant < MAX_STORED_Q) {
527 qmat = ctx->quants[quant];
529 qmat = ctx->custom_q;
530 for (i = 0; i < 64; i++)
531 qmat[i] = ctx->quant_mat[i] * quant;
534 for (i = 0; i < ctx->num_planes; i++) {
535 is_chroma = (i == 1 || i == 2);
536 plane_factor = slice_width_factor + 2;
538 plane_factor += ctx->chroma_factor - 3;
539 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
543 pwidth = avctx->width;
548 pwidth = avctx->width >> 1;
551 linesize = pic->linesize[i] * ctx->pictures_per_frame;
552 src = (const uint16_t*)(pic->data[i] + yp * linesize +
553 line_add * pic->linesize[i]) + xp;
556 get_slice_data(ctx, src, linesize, xp, yp,
557 pwidth, avctx->height / ctx->pictures_per_frame,
558 ctx->blocks[0], ctx->emu_buf,
559 mbs_per_slice, num_cblocks, is_chroma);
560 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
561 mbs_per_slice, ctx->blocks[0],
562 num_cblocks, plane_factor,
565 get_alpha_data(ctx, src, linesize, xp, yp,
566 pwidth, avctx->height / ctx->pictures_per_frame,
567 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
568 sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
569 ctx->blocks[0], quant);
571 total_size += sizes[i];
572 if (put_bits_left(pb) < 0) {
573 av_log(avctx, AV_LOG_ERROR,
574 "Underestimated required buffer size.\n");
581 static inline int estimate_vlc(unsigned codebook, int val)
583 unsigned int rice_order, exp_order, switch_bits, switch_val;
586 /* number of prefix bits to switch between Rice and expGolomb */
587 switch_bits = (codebook & 3) + 1;
588 rice_order = codebook >> 5; /* rice code order */
589 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
591 switch_val = switch_bits << rice_order;
593 if (val >= switch_val) {
594 val -= switch_val - (1 << exp_order);
595 exponent = av_log2(val);
597 return exponent * 2 - exp_order + switch_bits + 1;
599 return (val >> rice_order) + rice_order + 1;
603 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
607 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
610 prev_dc = (blocks[0] - 0x4000) / scale;
611 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
615 *error += FFABS(blocks[0] - 0x4000) % scale;
617 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
618 dc = (blocks[0] - 0x4000) / scale;
619 *error += FFABS(blocks[0] - 0x4000) % scale;
620 delta = dc - prev_dc;
621 new_sign = GET_SIGN(delta);
622 delta = (delta ^ sign) - sign;
623 code = MAKE_CODE(delta);
624 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
625 codebook = (code + (code & 1)) >> 1;
626 codebook = FFMIN(codebook, 3);
634 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
635 int plane_size_factor,
636 const uint8_t *scan, const int16_t *qmat)
639 int run, level, run_cb, lev_cb;
640 int max_coeffs, abs_level;
643 max_coeffs = blocks_per_slice << 6;
644 run_cb = ff_prores_run_to_cb_index[4];
645 lev_cb = ff_prores_lev_to_cb_index[2];
648 for (i = 1; i < 64; i++) {
649 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
650 level = blocks[idx] / qmat[scan[i]];
651 *error += FFABS(blocks[idx]) % qmat[scan[i]];
653 abs_level = FFABS(level);
654 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
655 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
658 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
659 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
670 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
671 const uint16_t *src, ptrdiff_t linesize,
673 int blocks_per_mb, int plane_size_factor,
674 const int16_t *qmat, ProresThreadData *td)
676 int blocks_per_slice;
679 blocks_per_slice = mbs_per_slice * blocks_per_mb;
681 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
682 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
683 plane_size_factor, ctx->scantable, qmat);
685 return FFALIGN(bits, 8);
688 static int est_alpha_diff(int cur, int prev, int abits)
690 const int mask = (1 << abits) - 1;
691 const int dbits = (abits == 8) ? 4 : 7;
692 const int dsize = 1 << dbits - 1;
693 int diff = cur - prev;
696 if (diff >= (1 << abits) - dsize)
698 if (diff < -dsize || diff > dsize || !diff)
704 static int estimate_alpha_plane(ProresContext *ctx, int *error,
705 const uint16_t *src, ptrdiff_t linesize,
706 int mbs_per_slice, int quant,
709 const int abits = ctx->alpha_bits;
710 const int mask = (1 << abits) - 1;
711 const int num_coeffs = mbs_per_slice * 256;
712 int prev = mask, cur;
719 bits = est_alpha_diff(cur, prev, abits);
730 bits += est_alpha_diff(cur, prev, abits);
736 } while (idx < num_coeffs);
748 static int find_slice_quant(AVCodecContext *avctx,
749 int trellis_node, int x, int y, int mbs_per_slice,
750 ProresThreadData *td)
752 ProresContext *ctx = avctx->priv_data;
753 int i, q, pq, xp, yp;
755 int slice_width_factor = av_log2(mbs_per_slice);
756 int num_cblocks[MAX_PLANES], pwidth;
757 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
758 const int min_quant = ctx->profile_info->min_quant;
759 const int max_quant = ctx->profile_info->max_quant;
760 int error, bits, bits_limit;
761 int mbs, prev, cur, new_score;
762 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
765 int linesize[4], line_add;
767 if (ctx->pictures_per_frame == 1)
770 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
771 mbs = x + mbs_per_slice;
773 for (i = 0; i < ctx->num_planes; i++) {
774 is_chroma[i] = (i == 1 || i == 2);
775 plane_factor[i] = slice_width_factor + 2;
777 plane_factor[i] += ctx->chroma_factor - 3;
778 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
782 pwidth = avctx->width;
787 pwidth = avctx->width >> 1;
790 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
791 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
792 line_add * ctx->pic->linesize[i]) + xp;
795 get_slice_data(ctx, src, linesize[i], xp, yp,
796 pwidth, avctx->height / ctx->pictures_per_frame,
797 td->blocks[i], td->emu_buf,
798 mbs_per_slice, num_cblocks[i], is_chroma[i]);
800 get_alpha_data(ctx, src, linesize[i], xp, yp,
801 pwidth, avctx->height / ctx->pictures_per_frame,
802 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
806 for (q = min_quant; q < max_quant + 2; q++) {
807 td->nodes[trellis_node + q].prev_node = -1;
808 td->nodes[trellis_node + q].quant = q;
811 // todo: maybe perform coarser quantising to fit into frame size when needed
812 for (q = min_quant; q <= max_quant; q++) {
815 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
816 bits += estimate_slice_plane(ctx, &error, i,
819 num_cblocks[i], plane_factor[i],
823 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
824 mbs_per_slice, q, td->blocks[3]);
825 if (bits > 65000 * 8)
828 slice_bits[q] = bits;
829 slice_score[q] = error;
831 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
832 slice_bits[max_quant + 1] = slice_bits[max_quant];
833 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
834 overquant = max_quant;
836 for (q = max_quant + 1; q < 128; q++) {
839 if (q < MAX_STORED_Q) {
840 qmat = ctx->quants[q];
843 for (i = 0; i < 64; i++)
844 qmat[i] = ctx->quant_mat[i] * q;
846 for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
847 bits += estimate_slice_plane(ctx, &error, i,
850 num_cblocks[i], plane_factor[i],
854 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
855 mbs_per_slice, q, td->blocks[3]);
856 if (bits <= ctx->bits_per_mb * mbs_per_slice)
860 slice_bits[max_quant + 1] = bits;
861 slice_score[max_quant + 1] = error;
864 td->nodes[trellis_node + max_quant + 1].quant = overquant;
866 bits_limit = mbs * ctx->bits_per_mb;
867 for (pq = min_quant; pq < max_quant + 2; pq++) {
868 prev = trellis_node - TRELLIS_WIDTH + pq;
870 for (q = min_quant; q < max_quant + 2; q++) {
871 cur = trellis_node + q;
873 bits = td->nodes[prev].bits + slice_bits[q];
874 error = slice_score[q];
875 if (bits > bits_limit)
878 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
879 new_score = td->nodes[prev].score + error;
881 new_score = SCORE_LIMIT;
882 if (td->nodes[cur].prev_node == -1 ||
883 td->nodes[cur].score >= new_score) {
885 td->nodes[cur].bits = bits;
886 td->nodes[cur].score = new_score;
887 td->nodes[cur].prev_node = prev;
892 error = td->nodes[trellis_node + min_quant].score;
893 pq = trellis_node + min_quant;
894 for (q = min_quant + 1; q < max_quant + 2; q++) {
895 if (td->nodes[trellis_node + q].score <= error) {
896 error = td->nodes[trellis_node + q].score;
897 pq = trellis_node + q;
904 static int find_quant_thread(AVCodecContext *avctx, void *arg,
905 int jobnr, int threadnr)
907 ProresContext *ctx = avctx->priv_data;
908 ProresThreadData *td = ctx->tdata + threadnr;
909 int mbs_per_slice = ctx->mbs_per_slice;
910 int x, y = jobnr, mb, q = 0;
912 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
913 while (ctx->mb_width - x < mbs_per_slice)
915 q = find_slice_quant(avctx,
916 (mb + 1) * TRELLIS_WIDTH, x, y,
920 for (x = ctx->slices_width - 1; x >= 0; x--) {
921 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
922 q = td->nodes[q].prev_node;
928 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
929 const AVFrame *pic, int *got_packet)
931 ProresContext *ctx = avctx->priv_data;
932 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
933 uint8_t *picture_size_pos;
935 int x, y, i, mb, q = 0;
936 int sizes[4] = { 0 };
937 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
938 int frame_size, picture_size, slice_size;
939 int pkt_size, ret, max_slice_size = 0;
943 #if FF_API_CODED_FRAME
944 FF_DISABLE_DEPRECATION_WARNINGS
945 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
946 avctx->coded_frame->key_frame = 1;
947 FF_ENABLE_DEPRECATION_WARNINGS
950 pkt_size = ctx->frame_size_upper_bound;
952 if ((ret = ff_alloc_packet(pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0) {
953 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
957 orig_buf = pkt->data;
960 orig_buf += 4; // frame size
961 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
966 buf += 2; // frame header size will be stored here
967 bytestream_put_be16 (&buf, 0); // version 1
968 bytestream_put_buffer(&buf, ctx->vendor, 4);
969 bytestream_put_be16 (&buf, avctx->width);
970 bytestream_put_be16 (&buf, avctx->height);
972 frame_flags = ctx->chroma_factor << 6;
973 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
974 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
975 bytestream_put_byte (&buf, frame_flags);
977 bytestream_put_byte (&buf, 0); // reserved
978 bytestream_put_byte (&buf, avctx->color_primaries);
979 bytestream_put_byte (&buf, avctx->color_trc);
980 bytestream_put_byte (&buf, avctx->colorspace);
981 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
982 bytestream_put_byte (&buf, 0); // reserved
983 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
984 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
985 // luma quantisation matrix
986 for (i = 0; i < 64; i++)
987 bytestream_put_byte(&buf, ctx->quant_mat[i]);
988 // chroma quantisation matrix
989 for (i = 0; i < 64; i++)
990 bytestream_put_byte(&buf, ctx->quant_mat[i]);
992 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
994 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
996 for (ctx->cur_picture_idx = 0;
997 ctx->cur_picture_idx < ctx->pictures_per_frame;
998 ctx->cur_picture_idx++) {
1000 picture_size_pos = buf + 1;
1001 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1002 buf += 4; // picture data size will be stored here
1003 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1004 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1006 // seek table - will be filled during slice encoding
1008 buf += ctx->slices_per_picture * 2;
1011 if (!ctx->force_quant) {
1012 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
1018 for (y = 0; y < ctx->mb_height; y++) {
1019 int mbs_per_slice = ctx->mbs_per_slice;
1020 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1021 q = ctx->force_quant ? ctx->force_quant
1022 : ctx->slice_q[mb + y * ctx->slices_width];
1024 while (ctx->mb_width - x < mbs_per_slice)
1025 mbs_per_slice >>= 1;
1027 bytestream_put_byte(&buf, slice_hdr_size << 3);
1029 buf += slice_hdr_size - 1;
1030 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1031 uint8_t *start = pkt->data;
1032 // Recompute new size according to max_slice_size
1034 int delta = 200 + ctx->pictures_per_frame *
1035 ctx->slices_per_picture * max_slice_size -
1038 delta = FFMAX(delta, 2 * max_slice_size);
1039 ctx->frame_size_upper_bound += delta;
1042 avpriv_request_sample(avctx,
1043 "Packet too small: is %i,"
1044 " needs %i (slice: %i). "
1045 "Correct allocation",
1046 pkt_size, delta, max_slice_size);
1050 ret = av_grow_packet(pkt, delta);
1056 orig_buf = pkt->data + (orig_buf - start);
1057 buf = pkt->data + (buf - start);
1058 picture_size_pos = pkt->data + (picture_size_pos - start);
1059 slice_sizes = pkt->data + (slice_sizes - start);
1060 slice_hdr = pkt->data + (slice_hdr - start);
1061 tmp = pkt->data + (tmp - start);
1063 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
1064 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1069 bytestream_put_byte(&slice_hdr, q);
1070 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1071 for (i = 0; i < ctx->num_planes - 1; i++) {
1072 bytestream_put_be16(&slice_hdr, sizes[i]);
1073 slice_size += sizes[i];
1075 bytestream_put_be16(&slice_sizes, slice_size);
1076 buf += slice_size - slice_hdr_size;
1077 if (max_slice_size < slice_size)
1078 max_slice_size = slice_size;
1082 if (ctx->pictures_per_frame == 1)
1083 picture_size = buf - picture_size_pos - 6;
1085 picture_size = buf - picture_size_pos + 1;
1086 bytestream_put_be32(&picture_size_pos, picture_size);
1090 frame_size = buf - orig_buf;
1091 bytestream_put_be32(&orig_buf, frame_size);
1093 pkt->size = frame_size;
1094 pkt->flags |= AV_PKT_FLAG_KEY;
1100 static av_cold int encode_close(AVCodecContext *avctx)
1102 ProresContext *ctx = avctx->priv_data;
1106 for (i = 0; i < avctx->thread_count; i++)
1107 av_free(ctx->tdata[i].nodes);
1109 av_freep(&ctx->tdata);
1110 av_freep(&ctx->slice_q);
1115 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1116 ptrdiff_t linesize, int16_t *block)
1119 const uint16_t *tsrc = src;
1121 for (y = 0; y < 8; y++) {
1122 for (x = 0; x < 8; x++)
1123 block[y * 8 + x] = tsrc[x];
1124 tsrc += linesize >> 1;
1129 static av_cold int encode_init(AVCodecContext *avctx)
1131 ProresContext *ctx = avctx->priv_data;
1134 int min_quant, max_quant;
1135 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1137 avctx->bits_per_raw_sample = 10;
1139 ctx->fdct = prores_fdct;
1140 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1141 : ff_prores_progressive_scan;
1142 ff_fdctdsp_init(&ctx->fdsp, avctx);
1144 mps = ctx->mbs_per_slice;
1145 if (mps & (mps - 1)) {
1146 av_log(avctx, AV_LOG_ERROR,
1147 "there should be an integer power of two MBs per slice\n");
1148 return AVERROR(EINVAL);
1150 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1151 if (ctx->alpha_bits & 7) {
1152 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1153 return AVERROR(EINVAL);
1155 avctx->bits_per_coded_sample = 32;
1157 ctx->alpha_bits = 0;
1160 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1163 ctx->profile_info = prores_profile_info + ctx->profile;
1164 ctx->num_planes = 3 + !!ctx->alpha_bits;
1166 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1169 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1171 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1173 ctx->slices_width = ctx->mb_width / mps;
1174 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1175 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1176 ctx->pictures_per_frame = 1 + interlaced;
1178 if (ctx->quant_sel == -1)
1179 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1181 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1183 if (strlen(ctx->vendor) != 4) {
1184 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1185 return AVERROR_INVALIDDATA;
1188 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1189 if (!ctx->force_quant) {
1190 if (!ctx->bits_per_mb) {
1191 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1192 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1193 ctx->pictures_per_frame)
1195 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1196 } else if (ctx->bits_per_mb < 128) {
1197 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1198 return AVERROR_INVALIDDATA;
1201 min_quant = ctx->profile_info->min_quant;
1202 max_quant = ctx->profile_info->max_quant;
1203 for (i = min_quant; i < MAX_STORED_Q; i++) {
1204 for (j = 0; j < 64; j++)
1205 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1208 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1209 if (!ctx->slice_q) {
1210 encode_close(avctx);
1211 return AVERROR(ENOMEM);
1214 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1216 encode_close(avctx);
1217 return AVERROR(ENOMEM);
1220 for (j = 0; j < avctx->thread_count; j++) {
1221 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1223 * sizeof(*ctx->tdata->nodes));
1224 if (!ctx->tdata[j].nodes) {
1225 encode_close(avctx);
1226 return AVERROR(ENOMEM);
1228 for (i = min_quant; i < max_quant + 2; i++) {
1229 ctx->tdata[j].nodes[i].prev_node = -1;
1230 ctx->tdata[j].nodes[i].bits = 0;
1231 ctx->tdata[j].nodes[i].score = 0;
1237 if (ctx->force_quant > 64) {
1238 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1239 return AVERROR_INVALIDDATA;
1242 for (j = 0; j < 64; j++) {
1243 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1244 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1247 ctx->bits_per_mb = ls * 8;
1248 if (ctx->chroma_factor == CFACTOR_Y444)
1249 ctx->bits_per_mb += ls * 4;
1252 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1253 ctx->slices_per_picture *
1254 (2 + 2 * ctx->num_planes +
1255 (mps * ctx->bits_per_mb) / 8)
1258 if (ctx->alpha_bits) {
1259 // The alpha plane is run-coded and might exceed the bit budget.
1260 ctx->frame_size_upper_bound += ctx->pictures_per_frame *
1261 ctx->slices_per_picture *
1262 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1263 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1266 avctx->codec_tag = ctx->profile_info->tag;
1268 av_log(avctx, AV_LOG_DEBUG,
1269 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1270 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1271 interlaced ? "yes" : "no", ctx->bits_per_mb);
1272 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1273 ctx->frame_size_upper_bound);
1278 #define OFFSET(x) offsetof(ProresContext, x)
1279 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1281 static const AVOption options[] = {
1282 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1283 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1284 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1285 { .i64 = PRORES_PROFILE_STANDARD },
1286 PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" },
1287 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1288 0, 0, VE, "profile" },
1289 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1290 0, 0, VE, "profile" },
1291 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1292 0, 0, VE, "profile" },
1293 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1294 0, 0, VE, "profile" },
1295 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1296 0, 0, VE, "profile" },
1297 { "vendor", "vendor ID", OFFSET(vendor),
1298 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1299 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1300 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1301 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1302 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1303 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1304 0, 0, VE, "quant_mat" },
1305 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1306 0, 0, VE, "quant_mat" },
1307 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1308 0, 0, VE, "quant_mat" },
1309 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1310 0, 0, VE, "quant_mat" },
1311 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1312 0, 0, VE, "quant_mat" },
1313 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1314 0, 0, VE, "quant_mat" },
1315 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1316 { .i64 = 16 }, 0, 16, VE },
1320 static const AVClass proresenc_class = {
1321 .class_name = "ProRes encoder",
1322 .item_name = av_default_item_name,
1324 .version = LIBAVUTIL_VERSION_INT,
1327 AVCodec ff_prores_encoder = {
1329 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1330 .type = AVMEDIA_TYPE_VIDEO,
1331 .id = AV_CODEC_ID_PRORES,
1332 .priv_data_size = sizeof(ProresContext),
1333 .init = encode_init,
1334 .close = encode_close,
1335 .encode2 = encode_frame,
1336 .capabilities = AV_CODEC_CAP_SLICE_THREADS,
1337 .pix_fmts = (const enum AVPixelFormat[]) {
1338 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1339 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1341 .priv_class = &proresenc_class,